blob: 26ffbb169d22ee38a06ba50c92ab663d0199bf11 [file] [log] [blame]
Eric Anholt11dd9e92011-05-24 16:34:27 -07001/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
Kenneth Graunkeec44d562013-09-17 23:32:10 -070024/** @file brw_fs_generator.cpp
Eric Anholt11dd9e92011-05-24 16:34:27 -070025 *
Kenneth Graunkeec44d562013-09-17 23:32:10 -070026 * This file supports generating code from the FS LIR to the actual
Eric Anholt11dd9e92011-05-24 16:34:27 -070027 * native instructions.
28 */
29
Eric Anholt11dd9e92011-05-24 16:34:27 -070030#include "brw_eu.h"
Eric Anholt11dd9e92011-05-24 16:34:27 -070031#include "brw_fs.h"
Eric Anholt5ed57d92012-10-03 13:03:12 -070032#include "brw_cfg.h"
Matt Turnerecac1aa2015-11-22 15:30:59 -080033#include "brw_program.h"
Eric Anholt11dd9e92011-05-24 16:34:27 -070034
Matt Turnerd74dd702015-10-23 13:11:44 -070035static enum brw_reg_file
36brw_file_from_reg(fs_reg *reg)
Matt Turnerc0e26c52014-12-05 09:53:11 -080037{
38 switch (reg->file) {
Matt Turnerb3315a62015-10-26 17:52:57 -070039 case ARF:
40 return BRW_ARCHITECTURE_REGISTER_FILE;
41 case FIXED_GRF:
Matt Turnerb163aa02015-10-26 17:09:25 -070042 case VGRF:
Matt Turnerc0e26c52014-12-05 09:53:11 -080043 return BRW_GENERAL_REGISTER_FILE;
44 case MRF:
45 return BRW_MESSAGE_REGISTER_FILE;
46 case IMM:
47 return BRW_IMMEDIATE_VALUE;
Matt Turner7c81a6a2015-10-26 06:58:56 -070048 case BAD_FILE:
Matt Turner7c81a6a2015-10-26 06:58:56 -070049 case ATTR:
50 case UNIFORM:
Matt Turnerc0e26c52014-12-05 09:53:11 -080051 unreachable("not reached");
52 }
Matt Turnerd74dd702015-10-23 13:11:44 -070053 return BRW_ARCHITECTURE_REGISTER_FILE;
Matt Turnerc0e26c52014-12-05 09:53:11 -080054}
55
Matt Turnerdb186f22014-11-28 12:21:03 -080056static struct brw_reg
Kenneth Graunkedabaf4f2016-05-18 19:02:45 -070057brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed)
Matt Turnerdb186f22014-11-28 12:21:03 -080058{
59 struct brw_reg brw_reg;
60
61 switch (reg->file) {
Matt Turnerdb186f22014-11-28 12:21:03 -080062 case MRF:
Matt Turner0eb3db12015-11-02 10:23:12 -080063 assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(gen));
Iago Toral Quiroga08586102015-09-16 09:08:19 +020064 /* Fallthrough */
Matt Turnerb163aa02015-10-26 17:09:25 -070065 case VGRF:
Matt Turnerdb186f22014-11-28 12:21:03 -080066 if (reg->stride == 0) {
Matt Turner7638e752015-10-26 04:35:14 -070067 brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
Matt Turnerdb186f22014-11-28 12:21:03 -080068 } else {
69 /* From the Haswell PRM:
70 *
Francisco Jerez0b4cd912016-05-19 21:43:48 -070071 * "VertStride must be used to cross GRF register boundaries. This
72 * rule implies that elements within a 'Width' cannot cross GRF
73 * boundaries."
Matt Turnerdb186f22014-11-28 12:21:03 -080074 *
Francisco Jerez0b4cd912016-05-19 21:43:48 -070075 * The maximum width value that could satisfy this restriction is:
Matt Turnerdb186f22014-11-28 12:21:03 -080076 */
Francisco Jerez0b4cd912016-05-19 21:43:48 -070077 const unsigned reg_width = REG_SIZE / (reg->stride * type_sz(reg->type));
78
79 /* Because the hardware can only split source regions at a whole
80 * multiple of width during decompression (i.e. vertically), clamp
81 * the value obtained above to the physical execution size of a
82 * single decompressed chunk of the instruction:
83 */
84 const unsigned phys_width = compressed ? inst->exec_size / 2 :
85 inst->exec_size;
86
87 /* XXX - The equation above is strictly speaking not correct on
88 * hardware that supports unbalanced GRF writes -- On Gen9+
89 * each decompressed chunk of the instruction may have a
90 * different execution size when the number of components
91 * written to each destination GRF is not the same.
92 */
93 const unsigned width = MIN2(reg_width, phys_width);
94 brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
95 brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
Matt Turnerdb186f22014-11-28 12:21:03 -080096 }
97
98 brw_reg = retype(brw_reg, reg->type);
Francisco Jerez1a4b7fd2016-09-01 21:25:18 -070099 brw_reg = byte_offset(brw_reg, reg->offset);
Matt Turner94b10312015-10-24 15:29:03 -0700100 brw_reg.abs = reg->abs;
101 brw_reg.negate = reg->negate;
Matt Turnerdb186f22014-11-28 12:21:03 -0800102 break;
Matt Turnerb3315a62015-10-26 17:52:57 -0700103 case ARF:
104 case FIXED_GRF:
Matt Turnera5b31152015-11-02 12:25:24 -0800105 case IMM:
Francisco Jerez1a4b7fd2016-09-01 21:25:18 -0700106 assert(reg->offset == 0);
Matt Turner2d8c5292015-11-19 21:51:37 -0800107 brw_reg = reg->as_brw_reg();
Matt Turnerdb186f22014-11-28 12:21:03 -0800108 break;
109 case BAD_FILE:
110 /* Probably unused. */
111 brw_reg = brw_null_reg();
112 break;
Matt Turner7c81a6a2015-10-26 06:58:56 -0700113 case ATTR:
114 case UNIFORM:
Matt Turnerdb186f22014-11-28 12:21:03 -0800115 unreachable("not reached");
116 }
Matt Turnerdb186f22014-11-28 12:21:03 -0800117
118 return brw_reg;
119}
120
Jason Ekstrandd7565b72015-04-16 14:34:04 -0700121fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
Kenneth Graunke2d4ac9b2014-05-14 01:21:02 -0700122 void *mem_ctx,
Kristian Høgsberg7bb9d332014-10-20 22:53:31 -0700123 const void *key,
124 struct brw_stage_prog_data *prog_data,
Matt Turnerb0d422c2015-03-16 12:18:31 -0700125 unsigned promoted_constants,
Kenneth Graunke68ed14d2015-01-13 14:28:13 -0800126 bool runtime_check_aads_emit,
Jason Ekstrand9870f792016-01-14 20:27:51 -0800127 gl_shader_stage stage)
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800128
Jason Ekstrandd7565b72015-04-16 14:34:04 -0700129 : compiler(compiler), log_data(log_data),
130 devinfo(compiler->devinfo), key(key),
Kristian Høgsberg686ef092014-10-27 19:43:31 -0700131 prog_data(prog_data),
Jason Ekstrand5e86f5b2015-10-05 16:01:33 -0700132 promoted_constants(promoted_constants),
Matt Turnerb0d422c2015-03-16 12:18:31 -0700133 runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
Jason Ekstrand9870f792016-01-14 20:27:51 -0800134 stage(stage), mem_ctx(mem_ctx)
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800135{
Jason Ekstranda85c4c92015-04-16 11:06:57 -0700136 p = rzalloc(mem_ctx, struct brw_codegen);
137 brw_init_codegen(devinfo, p, mem_ctx);
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800138}
139
140fs_generator::~fs_generator()
141{
142}
143
Matt Turner279c1c82014-11-12 11:01:16 -0800144class ip_record : public exec_node {
145public:
146 DECLARE_RALLOC_CXX_OPERATORS(ip_record)
147
148 ip_record(int ip)
149 {
150 this->ip = ip;
151 }
152
153 int ip;
154};
155
Matt Turnerb5fd7622014-05-16 13:06:45 -0700156bool
Eric Anholtbeafced2012-12-06 10:15:08 -0800157fs_generator::patch_discard_jumps_to_fb_writes()
158{
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700159 if (devinfo->gen < 6 || this->discard_halt_patches.is_empty())
Matt Turnerb5fd7622014-05-16 13:06:45 -0700160 return false;
Eric Anholtbeafced2012-12-06 10:15:08 -0800161
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700162 int scale = brw_jump_scale(p->devinfo);
Kenneth Graunke82ddd512014-06-30 08:00:25 -0700163
Eric Anholtbeafced2012-12-06 10:15:08 -0800164 /* There is a somewhat strange undocumented requirement of using
165 * HALT, according to the simulator. If some channel has HALTed to
166 * a particular UIP, then by the end of the program, every channel
167 * must have HALTed to that UIP. Furthermore, the tracking is a
168 * stack, so you can't do the final halt of a UIP after starting
169 * halting to a new UIP.
170 *
171 * Symptoms of not emitting this instruction on actual hardware
172 * included GPU hangs and sparkly rendering on the piglit discard
173 * tests.
174 */
Matt Turner7c796082014-06-13 14:29:25 -0700175 brw_inst *last_halt = gen6_HALT(p);
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700176 brw_inst_set_uip(p->devinfo, last_halt, 1 * scale);
177 brw_inst_set_jip(p->devinfo, last_halt, 1 * scale);
Eric Anholtbeafced2012-12-06 10:15:08 -0800178
179 int ip = p->nr_insn;
180
Matt Turnerc5030ac2014-06-24 15:53:19 -0700181 foreach_in_list(ip_record, patch_ip, &discard_halt_patches) {
Matt Turner7c796082014-06-13 14:29:25 -0700182 brw_inst *patch = &p->store[patch_ip->ip];
Eric Anholtbeafced2012-12-06 10:15:08 -0800183
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700184 assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT);
Eric Anholtbeafced2012-12-06 10:15:08 -0800185 /* HALT takes a half-instruction distance from the pre-incremented IP. */
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700186 brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
Eric Anholtbeafced2012-12-06 10:15:08 -0800187 }
188
189 this->discard_halt_patches.make_empty();
Matt Turnerb5fd7622014-05-16 13:06:45 -0700190 return true;
Eric Anholtbeafced2012-12-06 10:15:08 -0800191}
192
193void
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200194fs_generator::fire_fb_write(fs_inst *inst,
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700195 struct brw_reg payload,
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200196 struct brw_reg implied_header,
197 GLuint nr)
198{
199 uint32_t msg_control;
200
Kenneth Graunkee51e0552016-09-08 23:48:51 -0700201 struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
Jordan Justenc43ae402014-08-29 12:50:46 -0700202
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700203 if (devinfo->gen < 6) {
Kenneth Graunkea2ad7712014-06-10 18:50:03 -0700204 brw_push_insn_state(p);
Matt Turnerdd5c8252015-04-14 12:40:34 -0700205 brw_set_default_exec_size(p, BRW_EXECUTE_8);
Kenneth Graunkea2ad7712014-06-10 18:50:03 -0700206 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
207 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
208 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700209 brw_MOV(p, offset(payload, 1), brw_vec8_grf(1, 0));
Kenneth Graunkea2ad7712014-06-10 18:50:03 -0700210 brw_pop_insn_state(p);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200211 }
212
Kristian Høgsbergf9dc7aa2014-07-07 15:27:17 -0700213 if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
214 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
Iago Toral Quirogaa72fb692015-03-05 09:43:38 +0100215 else if (prog_data->dual_src_blend) {
Francisco Jerezece41df2016-05-20 16:14:13 -0700216 if (!inst->group)
Iago Toral Quirogaa72fb692015-03-05 09:43:38 +0100217 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
218 else
219 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
Francisco Jerezfa75f2d2015-07-13 15:41:34 +0300220 } else if (inst->exec_size == 16)
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200221 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
222 else
223 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
224
225 uint32_t surf_index =
226 prog_data->binding_table.render_target_start + inst->target;
227
Iago Toral Quirogaa72fb692015-03-05 09:43:38 +0100228 bool last_render_target = inst->eot ||
229 (prog_data->dual_src_blend && dispatch_width == 16);
230
231
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200232 brw_fb_WRITE(p,
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700233 payload,
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200234 implied_header,
235 msg_control,
236 surf_index,
237 nr,
238 0,
239 inst->eot,
Iago Toral Quirogaa72fb692015-03-05 09:43:38 +0100240 last_render_target,
Jason Ekstrand76c10862015-03-24 10:17:32 -0700241 inst->header_size != 0);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200242
243 brw_mark_surface_used(&prog_data->base, surf_index);
244}
245
246void
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700247fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700248{
Kenneth Graunkee51e0552016-09-08 23:48:51 -0700249 struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
Jordan Justen1f184bc2014-08-29 12:50:46 -0700250 const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key;
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700251 struct brw_reg implied_header;
252
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700253 if (devinfo->gen < 8 && !devinfo->is_haswell) {
Matt Turnere4d02992014-11-11 18:02:23 -0800254 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
255 }
256
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700257 if (inst->base_mrf >= 0)
258 payload = brw_message_reg(inst->base_mrf);
Jordan Justenc43ae402014-08-29 12:50:46 -0700259
Eric Anholt11dd9e92011-05-24 16:34:27 -0700260 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
261 * move, here's g1.
262 */
Jason Ekstrand76c10862015-03-24 10:17:32 -0700263 if (inst->header_size != 0) {
Kenneth Graunkeb207caf2014-06-10 18:54:09 -0700264 brw_push_insn_state(p);
265 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
266 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
267 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
Kenneth Graunkeefc818e2014-08-10 16:15:51 -0700268 brw_set_default_flag_reg(p, 0, 0);
Kenneth Graunkeb207caf2014-06-10 18:54:09 -0700269
Eric Anholtd92f5932014-02-13 21:37:50 -0800270 /* On HSW, the GPU will use the predicate on SENDC, unless the header is
271 * present.
272 */
Kenneth Graunke13328012014-11-08 02:34:43 -0800273 if (prog_data->uses_kill) {
Eric Anholtd92f5932014-02-13 21:37:50 -0800274 struct brw_reg pixel_mask;
275
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700276 if (devinfo->gen >= 6)
Eric Anholtd92f5932014-02-13 21:37:50 -0800277 pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
278 else
279 pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
280
281 brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
282 }
283
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700284 if (devinfo->gen >= 6) {
Matt Turnerdd5c8252015-04-14 12:40:34 -0700285 brw_push_insn_state(p);
286 brw_set_default_exec_size(p, BRW_EXECUTE_16);
Kenneth Graunkee3748092014-05-31 16:57:02 -0700287 brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700288 brw_MOV(p,
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700289 retype(payload, BRW_REGISTER_TYPE_UD),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700290 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
Matt Turnerdd5c8252015-04-14 12:40:34 -0700291 brw_pop_insn_state(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700292
Kenneth Graunkec96fdeb2014-05-14 00:24:50 -0700293 if (inst->target > 0 && key->replicate_alpha) {
Anuj Phogate592f7d2012-08-01 16:32:06 -0700294 /* Set "Source0 Alpha Present to RenderTarget" bit in message
295 * header.
296 */
297 brw_OR(p,
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700298 vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
Anuj Phogate592f7d2012-08-01 16:32:06 -0700299 vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
300 brw_imm_ud(0x1 << 11));
301 }
302
Eric Anholt11dd9e92011-05-24 16:34:27 -0700303 if (inst->target > 0) {
304 /* Set the render target index for choosing BLEND_STATE. */
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700305 brw_MOV(p, retype(vec1(suboffset(payload, 2)),
306 BRW_REGISTER_TYPE_UD),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700307 brw_imm_ud(inst->target));
308 }
309
Ben Widawsky1db44252015-10-20 14:29:39 -0700310 /* Set computes stencil to render target */
311 if (prog_data->computed_stencil) {
312 brw_OR(p,
313 vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
314 vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
315 brw_imm_ud(0x1 << 14));
316 }
317
Eric Anholt11dd9e92011-05-24 16:34:27 -0700318 implied_header = brw_null_reg();
319 } else {
320 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700321 }
Kenneth Graunkeb207caf2014-06-10 18:54:09 -0700322
323 brw_pop_insn_state(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700324 } else {
325 implied_header = brw_null_reg();
326 }
327
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200328 if (!runtime_check_aads_emit) {
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700329 fire_fb_write(inst, payload, implied_header, inst->mlen);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200330 } else {
331 /* This can only happen in gen < 6 */
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700332 assert(devinfo->gen < 6);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200333
334 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
335
336 /* Check runtime bit to detect if we have to send AA data or not */
337 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
338 brw_AND(p,
339 v1_null_ud,
340 retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD),
341 brw_imm_ud(1<<26));
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700342 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200343
344 int jmp = brw_JMPI(p, brw_imm_ud(0), BRW_PREDICATE_NORMAL) - p->store;
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700345 brw_inst_set_exec_size(p->devinfo, brw_last_inst, BRW_EXECUTE_1);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200346 {
347 /* Don't send AA data */
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700348 fire_fb_write(inst, offset(payload, 1), implied_header, inst->mlen-1);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200349 }
350 brw_land_fwd_jump(p, jmp);
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -0700351 fire_fb_write(inst, payload, implied_header, inst->mlen);
Iago Toral Quirogadc2d3a72014-06-05 15:03:08 +0200352 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700353}
354
Topi Pohjolainen9927d7a2013-12-17 14:00:50 +0200355void
Francisco Jerezf2f75b02016-07-21 16:52:33 -0700356fs_generator::generate_fb_read(fs_inst *inst, struct brw_reg dst,
357 struct brw_reg payload)
358{
Francisco Jerez69570bb2016-09-07 13:38:20 -0700359 assert(inst->size_written % REG_SIZE == 0);
Kenneth Graunkee51e0552016-09-08 23:48:51 -0700360 struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
Francisco Jerezf2f75b02016-07-21 16:52:33 -0700361 const unsigned surf_index =
362 prog_data->binding_table.render_target_start + inst->target;
363
364 gen9_fb_READ(p, dst, payload, surf_index,
Francisco Jerez69570bb2016-09-07 13:38:20 -0700365 inst->header_size, inst->size_written / REG_SIZE,
Francisco Jerezf2f75b02016-07-21 16:52:33 -0700366 prog_data->persample_dispatch);
367
368 brw_mark_surface_used(&prog_data->base, surf_index);
369}
370
371void
Kenneth Graunked2f089b2015-11-07 18:58:34 -0800372fs_generator::generate_mov_indirect(fs_inst *inst,
373 struct brw_reg dst,
374 struct brw_reg reg,
375 struct brw_reg indirect_byte_offset)
376{
377 assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
378 assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
379
380 unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
381
Jason Ekstrand27bd8ac2015-11-24 09:01:11 -0800382 if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
383 imm_byte_offset += indirect_byte_offset.ud;
Kenneth Graunked2f089b2015-11-07 18:58:34 -0800384
Jason Ekstrand27bd8ac2015-11-24 09:01:11 -0800385 reg.nr = imm_byte_offset / REG_SIZE;
386 reg.subnr = imm_byte_offset % REG_SIZE;
387 brw_MOV(p, dst, reg);
388 } else {
389 /* Prior to Broadwell, there are only 8 address registers. */
390 assert(inst->exec_size == 8 || devinfo->gen >= 8);
Kenneth Graunked2f089b2015-11-07 18:58:34 -0800391
Jason Ekstrand27bd8ac2015-11-24 09:01:11 -0800392 /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
393 struct brw_reg addr = vec8(brw_address_reg(0));
Kenneth Graunked2f089b2015-11-07 18:58:34 -0800394
Jason Ekstrand27bd8ac2015-11-24 09:01:11 -0800395 /* The destination stride of an instruction (in bytes) must be greater
396 * than or equal to the size of the rest of the instruction. Since the
397 * address register is of type UW, we can't use a D-type instruction.
398 * In order to get around this, re retype to UW and use a stride.
399 */
400 indirect_byte_offset =
401 retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
402
Jason Ekstrand2a4a8682016-10-28 14:48:53 -0700403 /* There are a number of reasons why we don't use the base offset here.
404 * One reason is that the field is only 9 bits which means we can only
405 * use it to access the first 16 GRFs. Also, from the Haswell PRM
406 * section "Register Region Restrictions":
407 *
408 * "The lower bits of the AddressImmediate must not overflow to
409 * change the register address. The lower 5 bits of Address
410 * Immediate when added to lower 5 bits of address register gives
411 * the sub-register offset. The upper bits of Address Immediate
412 * when added to upper bits of address register gives the register
413 * address. Any overflow from sub-register offset is dropped."
414 *
415 * Since the indirect may cause us to cross a register boundary, this
416 * makes the base offset almost useless. We could try and do something
417 * clever where we use a actual base offset if base_offset % 32 == 0 but
418 * that would mean we were generating different code depending on the
419 * base offset. Instead, for the sake of consistency, we'll just do the
420 * add ourselves. This restriction is only listed in the Haswell PRM
421 * but empirical testing indicates that it applies on all older
422 * generations and is lifted on Broadwell.
423 *
424 * In the end, while base_offset is nice to look at in the generated
425 * code, using it saves us 0 instructions and would require quite a bit
426 * of case-by-case work. It's just not worth it.
427 */
428 brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
429 struct brw_reg ind_src = brw_VxH_indirect(0, 0);
Jason Ekstrand27bd8ac2015-11-24 09:01:11 -0800430
431 brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
432
433 if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
434 !inst->get_next()->is_tail_sentinel() &&
435 ((fs_inst *)inst->get_next())->mlen > 0) {
436 /* From the Sandybridge PRM:
437 *
438 * "[Errata: DevSNB(SNB)] If MRF register is updated by any
439 * instruction that “indexed/indirect” source AND is followed by a
440 * send, the instruction requires a “Switch”. This is to avoid
441 * race condition where send may dispatch before MRF is updated."
442 */
443 brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
444 }
445 }
Kenneth Graunked2f089b2015-11-07 18:58:34 -0800446}
447
448void
Kenneth Graunkeac988882015-09-29 14:32:02 -0700449fs_generator::generate_urb_read(fs_inst *inst,
450 struct brw_reg dst,
451 struct brw_reg header)
452{
Francisco Jerez69570bb2016-09-07 13:38:20 -0700453 assert(inst->size_written % REG_SIZE == 0);
Kenneth Graunkeac988882015-09-29 14:32:02 -0700454 assert(header.file == BRW_GENERAL_REGISTER_FILE);
455 assert(header.type == BRW_REGISTER_TYPE_UD);
456
457 brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
Kenneth Graunkeed3ba652016-06-08 16:24:50 -0700458 brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
Kenneth Graunkeac988882015-09-29 14:32:02 -0700459 brw_set_src0(p, send, header);
460 brw_set_src1(p, send, brw_imm_ud(0u));
461
462 brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
463 brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
464
Kenneth Graunke5480bbd2015-11-07 01:37:33 -0800465 if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT)
466 brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
467
Kenneth Graunkeac988882015-09-29 14:32:02 -0700468 brw_inst_set_mlen(p->devinfo, send, inst->mlen);
Francisco Jerez69570bb2016-09-07 13:38:20 -0700469 brw_inst_set_rlen(p->devinfo, send, inst->size_written / REG_SIZE);
Kenneth Graunkeac988882015-09-29 14:32:02 -0700470 brw_inst_set_header_present(p->devinfo, send, true);
471 brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
472}
473
474void
Kristian Høgsbergd9e29f52014-10-20 23:00:50 -0700475fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
476{
477 brw_inst *insn;
478
479 insn = brw_next_insn(p, BRW_OPCODE_SEND);
480
481 brw_set_dest(p, insn, brw_null_reg());
482 brw_set_src0(p, insn, payload);
483 brw_set_src1(p, insn, brw_imm_d(0));
484
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700485 brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB);
486 brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
Kristian Høgsbergd9e29f52014-10-20 23:00:50 -0700487
Kenneth Graunkebea75222015-05-06 00:04:10 -0700488 if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
489 inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
490 brw_inst_set_urb_per_slot_offset(p->devinfo, insn, true);
491
492 if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
493 inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
494 brw_inst_set_urb_channel_mask_present(p->devinfo, insn, true);
495
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700496 brw_inst_set_mlen(p->devinfo, insn, inst->mlen);
497 brw_inst_set_rlen(p->devinfo, insn, 0);
498 brw_inst_set_eot(p->devinfo, insn, inst->eot);
499 brw_inst_set_header_present(p->devinfo, insn, true);
500 brw_inst_set_urb_global_offset(p->devinfo, insn, inst->offset);
Kristian Høgsbergd9e29f52014-10-20 23:00:50 -0700501}
502
503void
Jordan Justen2a4df9c2014-08-27 11:33:25 -0700504fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
505{
506 struct brw_inst *insn;
507
508 insn = brw_next_insn(p, BRW_OPCODE_SEND);
509
Jordan Justen7428e6f2016-01-31 18:28:42 -0800510 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
Kenneth Graunke31715782017-01-24 00:45:53 -0800511 brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UW));
Jordan Justen2a4df9c2014-08-27 11:33:25 -0700512 brw_set_src1(p, insn, brw_imm_d(0));
513
514 /* Terminate a compute shader by sending a message to the thread spawner.
515 */
516 brw_inst_set_sfid(devinfo, insn, BRW_SFID_THREAD_SPAWNER);
517 brw_inst_set_mlen(devinfo, insn, 1);
518 brw_inst_set_rlen(devinfo, insn, 0);
519 brw_inst_set_eot(devinfo, insn, inst->eot);
520 brw_inst_set_header_present(devinfo, insn, false);
521
522 brw_inst_set_ts_opcode(devinfo, insn, 0); /* Dereference resource */
523 brw_inst_set_ts_request_type(devinfo, insn, 0); /* Root thread */
524
525 /* Note that even though the thread has a URB resource associated with it,
526 * we set the "do not dereference URB" bit, because the URB resource is
527 * managed by the fixed-function unit, so it will free it automatically.
528 */
529 brw_inst_set_ts_resource_select(devinfo, insn, 1); /* Do not dereference URB */
530
531 brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
532}
533
534void
Jordan Justenf7ef8ec2014-08-27 11:32:08 -0700535fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
536{
537 brw_barrier(p, src);
538 brw_WAIT(p);
539}
540
541void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800542fs_generator::generate_linterp(fs_inst *inst,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700543 struct brw_reg dst, struct brw_reg *src)
544{
Matt Turner5af06042015-04-06 17:44:40 -0700545 /* PLN reads:
546 * / in SIMD16 \
547 * -----------------------------------
548 * | src1+0 | src1+1 | src1+2 | src1+3 |
549 * |-----------------------------------|
550 * |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)|
551 * -----------------------------------
552 *
553 * but for the LINE/MAC pair, the LINE reads Xs and the MAC reads Ys:
554 *
555 * -----------------------------------
556 * | src1+0 | src1+1 | src1+2 | src1+3 |
557 * |-----------------------------------|
558 * |(x0, x1)|(y0, y1)| | | in SIMD8
559 * |-----------------------------------|
560 * |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16
561 * -----------------------------------
562 *
563 * See also: emit_interpolation_setup_gen4().
564 */
Eric Anholt11dd9e92011-05-24 16:34:27 -0700565 struct brw_reg delta_x = src[0];
Francisco Jerez448340d2016-05-20 00:13:33 -0700566 struct brw_reg delta_y = offset(src[0], inst->exec_size / 8);
Matt Turner5af06042015-04-06 17:44:40 -0700567 struct brw_reg interp = src[1];
Eric Anholt11dd9e92011-05-24 16:34:27 -0700568
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700569 if (devinfo->has_pln &&
570 (devinfo->gen >= 7 || (delta_x.nr & 1) == 0)) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700571 brw_PLN(p, dst, interp, delta_x);
572 } else {
573 brw_LINE(p, brw_null_reg(), interp, delta_x);
574 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
575 }
576}
577
578void
Samuel Iglesias Gonsalvezb23eb642015-04-13 16:55:49 +0200579fs_generator::generate_get_buffer_size(fs_inst *inst,
580 struct brw_reg dst,
581 struct brw_reg src,
582 struct brw_reg surf_index)
583{
584 assert(devinfo->gen >= 7);
585 assert(surf_index.file == BRW_IMMEDIATE_VALUE);
586
587 uint32_t simd_mode;
588 int rlen = 4;
589
590 switch (inst->exec_size) {
591 case 8:
592 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
593 break;
594 case 16:
595 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
596 break;
597 default:
598 unreachable("Invalid width for texture instruction");
599 }
600
601 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
602 rlen = 8;
603 dst = vec16(dst);
604 }
605
606 brw_SAMPLE(p,
607 retype(dst, BRW_REGISTER_TYPE_UW),
608 inst->base_mrf,
609 src,
Matt Turnere42fb0c2015-10-22 19:41:30 -0700610 surf_index.ud,
Samuel Iglesias Gonsalvezb23eb642015-04-13 16:55:49 +0200611 0,
612 GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
613 rlen, /* response length */
614 inst->mlen,
615 inst->header_size > 0,
616 simd_mode,
617 BRW_SAMPLER_RETURN_FORMAT_SINT32);
Matt Turnere42fb0c2015-10-22 19:41:30 -0700618
619 brw_mark_surface_used(prog_data, surf_index.ud);
Samuel Iglesias Gonsalvezb23eb642015-04-13 16:55:49 +0200620}
621
622void
Chris Forbesba5f7a32014-08-03 21:23:31 +1200623fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
Jason Ekstrandc0c14de2015-11-02 15:24:05 -0800624 struct brw_reg surface_index,
Chris Forbesba5f7a32014-08-03 21:23:31 +1200625 struct brw_reg sampler_index)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700626{
Francisco Jerez69570bb2016-09-07 13:38:20 -0700627 assert(inst->size_written % REG_SIZE == 0);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700628 int msg_type = -1;
Jason Ekstrand30d718c2014-08-29 17:22:57 -0700629 uint32_t simd_mode;
Eric Anholt7e84a642011-11-09 16:07:57 -0800630 uint32_t return_format;
Ben Widawskyb069f9e2015-02-08 13:59:57 -0800631 bool is_combined_send = inst->eot;
Eric Anholt7e84a642011-11-09 16:07:57 -0800632
633 switch (dst.type) {
634 case BRW_REGISTER_TYPE_D:
635 return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
636 break;
637 case BRW_REGISTER_TYPE_UD:
638 return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
639 break;
640 default:
641 return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
642 break;
643 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700644
Jason Ekstrandd065a932015-11-11 15:46:55 -0800645 /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type
646 * is set as part of the message descriptor. On gen4, the PRM seems to
647 * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
648 * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is
649 * gone from the message descriptor entirely and you just get UINT32 all
650 * the time regasrdless. Since we can really only do non-UINT32 on gen4,
651 * just stomp it to UINT32 all the time.
652 */
653 if (inst->opcode == SHADER_OPCODE_TXS)
654 return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
655
Jason Ekstrand30d718c2014-08-29 17:22:57 -0700656 switch (inst->exec_size) {
657 case 8:
658 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
659 break;
660 case 16:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700661 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
Jason Ekstrand30d718c2014-08-29 17:22:57 -0700662 break;
663 default:
664 unreachable("Invalid width for texture instruction");
665 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700666
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700667 if (devinfo->gen >= 5) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700668 switch (inst->opcode) {
Kenneth Graunkefebad172011-10-26 12:58:37 -0700669 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700670 if (inst->shadow_compare) {
671 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
672 } else {
673 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
674 }
675 break;
676 case FS_OPCODE_TXB:
677 if (inst->shadow_compare) {
678 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
679 } else {
680 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
681 }
682 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700683 case SHADER_OPCODE_TXL:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700684 if (inst->shadow_compare) {
685 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
686 } else {
687 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
688 }
689 break;
Matt Turner75dccf52016-05-04 15:46:45 -0700690 case SHADER_OPCODE_TXL_LZ:
691 assert(devinfo->gen >= 9);
692 if (inst->shadow_compare) {
693 msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ;
694 } else {
695 msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
696 }
697 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700698 case SHADER_OPCODE_TXS:
Kenneth Graunkeecf89632011-06-19 01:47:50 -0700699 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
700 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700701 case SHADER_OPCODE_TXD:
Kenneth Graunke899017f2013-01-04 07:53:09 -0800702 if (inst->shadow_compare) {
Iago Toral Quiroga44873ad2016-11-30 11:45:59 +0100703 /* Gen7.5+. Otherwise, lowered in NIR */
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700704 assert(devinfo->gen >= 8 || devinfo->is_haswell);
Kenneth Graunke899017f2013-01-04 07:53:09 -0800705 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
706 } else {
707 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
708 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700709 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700710 case SHADER_OPCODE_TXF:
Kenneth Graunke30be2cc2011-08-25 17:13:37 -0700711 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
712 break;
Matt Turner75dccf52016-05-04 15:46:45 -0700713 case SHADER_OPCODE_TXF_LZ:
714 assert(devinfo->gen >= 9);
715 msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
716 break;
Neil Robertse386fb02015-09-08 15:52:09 +0100717 case SHADER_OPCODE_TXF_CMS_W:
718 assert(devinfo->gen >= 9);
719 msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
720 break;
Topi Pohjolainence527a62013-12-10 16:36:31 +0200721 case SHADER_OPCODE_TXF_CMS:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700722 if (devinfo->gen >= 7)
Chris Forbesf52ce6a2013-01-24 21:35:15 +1300723 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
724 else
725 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
726 break;
Topi Pohjolainen41d397f2013-12-10 16:38:15 +0200727 case SHADER_OPCODE_TXF_UMS:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700728 assert(devinfo->gen >= 7);
Topi Pohjolainen41d397f2013-12-10 16:38:15 +0200729 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
730 break;
Chris Forbes7629c482013-11-30 10:32:16 +1300731 case SHADER_OPCODE_TXF_MCS:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700732 assert(devinfo->gen >= 7);
Chris Forbes7629c482013-11-30 10:32:16 +1300733 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
734 break;
Matt Turnerb8aa9f72013-03-06 14:47:01 -0800735 case SHADER_OPCODE_LOD:
736 msg_type = GEN5_SAMPLER_MESSAGE_LOD;
737 break;
Chris Forbesfb455502013-03-31 21:31:12 +1300738 case SHADER_OPCODE_TG4:
Chris Forbes3c98d772013-10-10 19:57:29 +1300739 if (inst->shadow_compare) {
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700740 assert(devinfo->gen >= 7);
Chris Forbes3c98d772013-10-10 19:57:29 +1300741 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
742 } else {
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700743 assert(devinfo->gen >= 6);
Chris Forbes3c98d772013-10-10 19:57:29 +1300744 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
745 }
Chris Forbesfb455502013-03-31 21:31:12 +1300746 break;
Chris Forbes6bb2cf22013-10-08 21:42:10 +1300747 case SHADER_OPCODE_TG4_OFFSET:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700748 assert(devinfo->gen >= 7);
Chris Forbes3c98d772013-10-10 19:57:29 +1300749 if (inst->shadow_compare) {
750 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
751 } else {
752 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
753 }
Chris Forbes6bb2cf22013-10-08 21:42:10 +1300754 break;
Ilia Mirkin0b91bce2015-08-11 20:37:32 -0400755 case SHADER_OPCODE_SAMPLEINFO:
756 msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
757 break;
Eric Anholt6034b9a2011-05-03 10:55:50 -0700758 default:
Matt Turner3d826722014-06-29 14:54:01 -0700759 unreachable("not reached");
Eric Anholt11dd9e92011-05-24 16:34:27 -0700760 }
761 } else {
762 switch (inst->opcode) {
Kenneth Graunkefebad172011-10-26 12:58:37 -0700763 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700764 /* Note that G45 and older determines shadow compare and dispatch width
765 * from message length for most messages.
766 */
Francisco Jerez8fbb3d32015-07-13 15:42:20 +0300767 if (inst->exec_size == 8) {
Kenneth Graunke797d6062015-02-20 15:11:49 -0800768 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
769 if (inst->shadow_compare) {
770 assert(inst->mlen == 6);
771 } else {
772 assert(inst->mlen <= 4);
773 }
774 } else {
775 if (inst->shadow_compare) {
776 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
777 assert(inst->mlen == 9);
778 } else {
779 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
780 assert(inst->mlen <= 7 && inst->mlen % 2 == 1);
781 }
782 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700783 break;
784 case FS_OPCODE_TXB:
785 if (inst->shadow_compare) {
Francisco Jerez8fbb3d32015-07-13 15:42:20 +0300786 assert(inst->exec_size == 8);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700787 assert(inst->mlen == 6);
788 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
789 } else {
790 assert(inst->mlen == 9);
791 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
792 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
793 }
794 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700795 case SHADER_OPCODE_TXL:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700796 if (inst->shadow_compare) {
Francisco Jerez8fbb3d32015-07-13 15:42:20 +0300797 assert(inst->exec_size == 8);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700798 assert(inst->mlen == 6);
799 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
800 } else {
801 assert(inst->mlen == 9);
802 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
803 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
804 }
805 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700806 case SHADER_OPCODE_TXD:
Kenneth Graunke6430df32011-06-10 14:48:46 -0700807 /* There is no sample_d_c message; comparisons are done manually */
Francisco Jerez8fbb3d32015-07-13 15:42:20 +0300808 assert(inst->exec_size == 8);
Kenneth Graunke6c947cf2011-06-08 16:05:34 -0700809 assert(inst->mlen == 7 || inst->mlen == 10);
810 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700811 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700812 case SHADER_OPCODE_TXF:
Kenneth Graunke797d6062015-02-20 15:11:49 -0800813 assert(inst->mlen <= 9 && inst->mlen % 2 == 1);
Kenneth Graunke47b556f2011-09-06 16:39:01 -0700814 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
815 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
816 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700817 case SHADER_OPCODE_TXS:
Kenneth Graunke4eeb4c12011-08-17 10:45:47 -0700818 assert(inst->mlen == 3);
819 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
820 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
821 break;
Eric Anholt6034b9a2011-05-03 10:55:50 -0700822 default:
Matt Turner3d826722014-06-29 14:54:01 -0700823 unreachable("not reached");
Eric Anholt11dd9e92011-05-24 16:34:27 -0700824 }
825 }
826 assert(msg_type != -1);
827
828 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700829 dst = vec16(dst);
830 }
831
Jason Ekstrand76c10862015-03-24 10:17:32 -0700832 assert(devinfo->gen < 7 || inst->header_size == 0 ||
Jason Ekstrand72105832014-08-18 14:27:55 -0700833 src.file == BRW_GENERAL_REGISTER_FILE);
Eric Anholt36fbe662013-10-09 17:17:59 -0700834
Chris Forbesba5f7a32014-08-03 21:23:31 +1200835 assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
836
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700837 /* Load the message header if present. If there's a texture offset,
838 * we need to set it up explicitly and load the offset bitfield.
839 * Otherwise, we can use an implied move from g0 to the first message reg.
840 */
Jason Ekstrand76c10862015-03-24 10:17:32 -0700841 if (inst->header_size != 0) {
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700842 if (devinfo->gen < 6 && !inst->offset) {
Kenneth Graunkeebfe43d2014-01-18 12:48:18 -0800843 /* Set up an implied move from g0 to the MRF. */
844 src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
Eric Anholt36fbe662013-10-09 17:17:59 -0700845 } else {
Kenneth Graunkeebfe43d2014-01-18 12:48:18 -0800846 struct brw_reg header_reg;
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700847
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -0700848 if (devinfo->gen >= 7) {
Kenneth Graunkeebfe43d2014-01-18 12:48:18 -0800849 header_reg = src;
850 } else {
851 assert(inst->base_mrf != -1);
852 header_reg = brw_message_reg(inst->base_mrf);
853 }
854
Chris Forbesb38af012013-10-13 12:20:03 +1300855 brw_push_insn_state(p);
Matt Turnerdd5c8252015-04-14 12:40:34 -0700856 brw_set_default_exec_size(p, BRW_EXECUTE_8);
Kenneth Graunkee3748092014-05-31 16:57:02 -0700857 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
858 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
Kenneth Graunkeebfe43d2014-01-18 12:48:18 -0800859 /* Explicitly set up the message header by copying g0 to the MRF. */
860 brw_MOV(p, header_reg, brw_vec8_grf(0, 0));
861
Matt Turnerbd502132014-11-12 11:28:02 -0800862 if (inst->offset) {
Kenneth Graunkeebfe43d2014-01-18 12:48:18 -0800863 /* Set the offset bits in DWord 2. */
864 brw_MOV(p, get_element_ud(header_reg, 2),
Matt Turnerbd502132014-11-12 11:28:02 -0800865 brw_imm_ud(inst->offset));
Jason Ekstrand61b0cfd2016-01-14 20:42:47 -0800866 } else if (stage != MESA_SHADER_VERTEX &&
867 stage != MESA_SHADER_FRAGMENT) {
868 /* The vertex and fragment stages have g0.2 set to 0, so
869 * header0.2 is 0 when g0 is copied. Other stages may not, so we
870 * must set it to 0 to avoid setting undesirable bits in the
871 * message.
872 */
873 brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0));
Kenneth Graunkeebfe43d2014-01-18 12:48:18 -0800874 }
Kenneth Graunke6943ac02014-01-18 13:29:39 -0800875
Jason Ekstrand7de8a3e2015-01-22 13:46:44 -0800876 brw_adjust_sampler_state_pointer(p, header_reg, sampler_index);
Chris Forbesb38af012013-10-13 12:20:03 +1300877 brw_pop_insn_state(p);
Chris Forbesb38af012013-10-13 12:20:03 +1300878 }
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700879 }
880
Chris Forbes4ba51712014-08-10 11:58:06 +1200881 uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
882 inst->opcode == SHADER_OPCODE_TG4_OFFSET)
Jordan Justenc43ae402014-08-29 12:50:46 -0700883 ? prog_data->binding_table.gather_texture_start
884 : prog_data->binding_table.texture_start;
Chris Forbesdd4c2a52013-09-15 18:23:14 +1200885
Jason Ekstrandc0c14de2015-11-02 15:24:05 -0800886 if (surface_index.file == BRW_IMMEDIATE_VALUE &&
887 sampler_index.file == BRW_IMMEDIATE_VALUE) {
888 uint32_t surface = surface_index.ud;
Matt Turnere42fb0c2015-10-22 19:41:30 -0700889 uint32_t sampler = sampler_index.ud;
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700890
Chris Forbes4ba51712014-08-10 11:58:06 +1200891 brw_SAMPLE(p,
892 retype(dst, BRW_REGISTER_TYPE_UW),
893 inst->base_mrf,
894 src,
Jason Ekstrandc0c14de2015-11-02 15:24:05 -0800895 surface + base_binding_table_index,
Chris Forbes4ba51712014-08-10 11:58:06 +1200896 sampler % 16,
897 msg_type,
Francisco Jerez69570bb2016-09-07 13:38:20 -0700898 inst->size_written / REG_SIZE,
Chris Forbes4ba51712014-08-10 11:58:06 +1200899 inst->mlen,
Jason Ekstrand76c10862015-03-24 10:17:32 -0700900 inst->header_size != 0,
Chris Forbes4ba51712014-08-10 11:58:06 +1200901 simd_mode,
902 return_format);
903
Jason Ekstrandc0c14de2015-11-02 15:24:05 -0800904 brw_mark_surface_used(prog_data, surface + base_binding_table_index);
Chris Forbes4ba51712014-08-10 11:58:06 +1200905 } else {
Chris Forbesfbfcd672014-08-10 12:02:22 +1200906 /* Non-const sampler index */
Chris Forbesfbfcd672014-08-10 12:02:22 +1200907
908 struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
Jason Ekstrandc0c14de2015-11-02 15:24:05 -0800909 struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD));
Chris Forbesfbfcd672014-08-10 12:02:22 +1200910 struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
911
912 brw_push_insn_state(p);
913 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
914 brw_set_default_access_mode(p, BRW_ALIGN_1);
915
Kenneth Graunke59156b22016-05-13 16:41:13 -0700916 if (brw_regs_equal(&surface_reg, &sampler_reg)) {
Jason Ekstrandc0c14de2015-11-02 15:24:05 -0800917 brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
918 } else {
Jason Ekstrand88b5acf2016-12-22 15:26:12 -0800919 if (sampler_reg.file == BRW_IMMEDIATE_VALUE) {
920 brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8));
921 } else {
922 brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
923 brw_OR(p, addr, addr, surface_reg);
924 }
Jason Ekstrandc0c14de2015-11-02 15:24:05 -0800925 }
Neil Roberts7f62fda2015-05-29 13:41:48 +0100926 if (base_binding_table_index)
927 brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
Neil Roberts6c846dc2015-05-28 15:27:31 +0100928 brw_AND(p, addr, addr, brw_imm_ud(0xfff));
Chris Forbesfbfcd672014-08-10 12:02:22 +1200929
Francisco Jereza815cd82015-02-26 17:24:03 +0200930 brw_pop_insn_state(p);
931
Francisco Jereza902a5d2015-03-19 15:44:24 +0200932 /* dst = send(offset, a0.0 | <descriptor>) */
933 brw_inst *insn = brw_send_indirect_message(
934 p, BRW_SFID_SAMPLER, dst, src, addr);
935 brw_set_sampler_message(p, insn,
Chris Forbesfbfcd672014-08-10 12:02:22 +1200936 0 /* surface */,
937 0 /* sampler */,
938 msg_type,
Francisco Jerez69570bb2016-09-07 13:38:20 -0700939 inst->size_written / REG_SIZE,
Chris Forbesfbfcd672014-08-10 12:02:22 +1200940 inst->mlen /* mlen */,
Jason Ekstrand76c10862015-03-24 10:17:32 -0700941 inst->header_size != 0 /* header */,
Chris Forbesfbfcd672014-08-10 12:02:22 +1200942 simd_mode,
943 return_format);
Chris Forbesfbfcd672014-08-10 12:02:22 +1200944
Chris Forbesfbfcd672014-08-10 12:02:22 +1200945 /* visitor knows more than we do about the surface limit required,
946 * so has already done marking.
947 */
Chris Forbes4ba51712014-08-10 11:58:06 +1200948 }
Ben Widawskyb069f9e2015-02-08 13:59:57 -0800949
950 if (is_combined_send) {
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -0700951 brw_inst_set_eot(p->devinfo, brw_last_inst, true);
952 brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
Ben Widawskyb069f9e2015-02-08 13:59:57 -0800953 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700954}
955
956
957/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
958 * looking like:
959 *
960 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
961 *
Chia-I Wu848c0e72013-09-12 13:00:52 +0800962 * Ideally, we want to produce:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700963 *
964 * DDX DDY
965 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
966 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
967 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
968 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
969 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
970 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
971 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
972 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
973 *
974 * and add another set of two more subspans if in 16-pixel dispatch mode.
975 *
976 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
977 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
Chia-I Wu848c0e72013-09-12 13:00:52 +0800978 * pair. But the ideal approximation may impose a huge performance cost on
979 * sample_d. On at least Haswell, sample_d instruction does some
980 * optimizations if the same LOD is used for all pixels in the subspan.
981 *
Paul Berry800610f2013-09-20 09:04:31 -0700982 * For DDY, we need to use ALIGN16 mode since it's capable of doing the
983 * appropriate swizzling.
Eric Anholt11dd9e92011-05-24 16:34:27 -0700984 */
985void
Kenneth Graunkecea37f02014-11-08 01:39:14 -0800986fs_generator::generate_ddx(enum opcode opcode,
987 struct brw_reg dst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700988{
Chia-I Wu848c0e72013-09-12 13:00:52 +0800989 unsigned vstride, width;
990
Kenneth Graunkecea37f02014-11-08 01:39:14 -0800991 if (opcode == FS_OPCODE_DDX_FINE) {
Chia-I Wu848c0e72013-09-12 13:00:52 +0800992 /* produce accurate derivatives */
993 vstride = BRW_VERTICAL_STRIDE_2;
994 width = BRW_WIDTH_2;
Kenneth Graunkecea37f02014-11-08 01:39:14 -0800995 } else {
Chia-I Wu848c0e72013-09-12 13:00:52 +0800996 /* replicate the derivative at the top-left pixel to other pixels */
997 vstride = BRW_VERTICAL_STRIDE_4;
998 width = BRW_WIDTH_4;
999 }
1000
Eric Anholt11dd9e92011-05-24 16:34:27 -07001001 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
Andres Gomez8517e662014-12-12 17:19:07 +01001002 src.negate, src.abs,
Eric Anholt11dd9e92011-05-24 16:34:27 -07001003 BRW_REGISTER_TYPE_F,
Chia-I Wu848c0e72013-09-12 13:00:52 +08001004 vstride,
1005 width,
Eric Anholt11dd9e92011-05-24 16:34:27 -07001006 BRW_HORIZONTAL_STRIDE_0,
1007 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1008 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
Andres Gomez8517e662014-12-12 17:19:07 +01001009 src.negate, src.abs,
Eric Anholt11dd9e92011-05-24 16:34:27 -07001010 BRW_REGISTER_TYPE_F,
Chia-I Wu848c0e72013-09-12 13:00:52 +08001011 vstride,
1012 width,
Eric Anholt11dd9e92011-05-24 16:34:27 -07001013 BRW_HORIZONTAL_STRIDE_0,
1014 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1015 brw_ADD(p, dst, src0, negate(src1));
1016}
1017
Paul Berry82d25962012-06-20 13:40:45 -07001018/* The negate_value boolean is used to negate the derivative computation for
1019 * FBOs, since they place the origin at the upper left instead of the lower
1020 * left.
1021 */
Eric Anholt11dd9e92011-05-24 16:34:27 -07001022void
Kenneth Graunkecea37f02014-11-08 01:39:14 -08001023fs_generator::generate_ddy(enum opcode opcode,
Kenneth Graunkedac10e82016-05-17 01:52:16 -07001024 struct brw_reg dst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -07001025{
Kenneth Graunkecea37f02014-11-08 01:39:14 -08001026 if (opcode == FS_OPCODE_DDY_FINE) {
Paul Berry800610f2013-09-20 09:04:31 -07001027 /* produce accurate derivatives */
1028 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
Andres Gomez8517e662014-12-12 17:19:07 +01001029 src.negate, src.abs,
Paul Berry800610f2013-09-20 09:04:31 -07001030 BRW_REGISTER_TYPE_F,
1031 BRW_VERTICAL_STRIDE_4,
1032 BRW_WIDTH_4,
1033 BRW_HORIZONTAL_STRIDE_1,
1034 BRW_SWIZZLE_XYXY, WRITEMASK_XYZW);
1035 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
Andres Gomez8517e662014-12-12 17:19:07 +01001036 src.negate, src.abs,
Paul Berry800610f2013-09-20 09:04:31 -07001037 BRW_REGISTER_TYPE_F,
1038 BRW_VERTICAL_STRIDE_4,
1039 BRW_WIDTH_4,
1040 BRW_HORIZONTAL_STRIDE_1,
1041 BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
1042 brw_push_insn_state(p);
Kenneth Graunkee3748092014-05-31 16:57:02 -07001043 brw_set_default_access_mode(p, BRW_ALIGN_16);
Francisco Jerezbb89beb2016-05-27 23:22:02 -07001044 brw_ADD(p, dst, negate(src0), src1);
Paul Berry800610f2013-09-20 09:04:31 -07001045 brw_pop_insn_state(p);
1046 } else {
1047 /* replicate the derivative at the top-left pixel to other pixels */
1048 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
Andres Gomez8517e662014-12-12 17:19:07 +01001049 src.negate, src.abs,
Paul Berry800610f2013-09-20 09:04:31 -07001050 BRW_REGISTER_TYPE_F,
1051 BRW_VERTICAL_STRIDE_4,
1052 BRW_WIDTH_4,
1053 BRW_HORIZONTAL_STRIDE_0,
1054 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1055 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
Andres Gomez8517e662014-12-12 17:19:07 +01001056 src.negate, src.abs,
Paul Berry800610f2013-09-20 09:04:31 -07001057 BRW_REGISTER_TYPE_F,
1058 BRW_VERTICAL_STRIDE_4,
1059 BRW_WIDTH_4,
1060 BRW_HORIZONTAL_STRIDE_0,
1061 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
Kenneth Graunke9b8b3f72016-05-18 10:35:54 -07001062 brw_ADD(p, dst, negate(src0), src1);
Paul Berry800610f2013-09-20 09:04:31 -07001063 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001064}
1065
1066void
Eric Anholtbeafced2012-12-06 10:15:08 -08001067fs_generator::generate_discard_jump(fs_inst *inst)
1068{
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001069 assert(devinfo->gen >= 6);
Eric Anholtbeafced2012-12-06 10:15:08 -08001070
1071 /* This HALT will be patched up at FB write time to point UIP at the end of
1072 * the program, and at brw_uip_jip() JIP will be set to the end of the
1073 * current block (or the program).
1074 */
1075 this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
Eric Anholtbeafced2012-12-06 10:15:08 -08001076 gen6_HALT(p);
Eric Anholtbeafced2012-12-06 10:15:08 -08001077}
1078
1079void
Eric Anholt60322612013-10-16 11:45:06 -07001080fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -07001081{
Francisco Jerez1e3c58f2016-05-27 23:29:02 -07001082 /* The 32-wide messages only respect the first 16-wide half of the channel
1083 * enable signals which are replicated identically for the second group of
1084 * 16 channels, so we cannot use them unless the write is marked
1085 * force_writemask_all.
1086 */
1087 const unsigned lower_size = inst->force_writemask_all ? inst->exec_size :
1088 MIN2(16, inst->exec_size);
1089 const unsigned block_size = 4 * lower_size / REG_SIZE;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001090 assert(inst->mlen != 0);
1091
Francisco Jerez1e3c58f2016-05-27 23:29:02 -07001092 brw_push_insn_state(p);
1093 brw_set_default_exec_size(p, cvt(lower_size) - 1);
1094 brw_set_default_compression(p, lower_size > 8);
1095
1096 for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
Francisco Jerezece41df2016-05-20 16:14:13 -07001097 brw_set_default_group(p, inst->group + lower_size * i);
Francisco Jerez1e3c58f2016-05-27 23:29:02 -07001098
1099 brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
1100 retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
1101
1102 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf),
1103 block_size,
1104 inst->offset + block_size * REG_SIZE * i);
1105 }
1106
1107 brw_pop_insn_state(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001108}
1109
1110void
Eric Anholt60322612013-10-16 11:45:06 -07001111fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst)
Eric Anholt11dd9e92011-05-24 16:34:27 -07001112{
Francisco Jereza7d319c2016-05-16 15:47:39 -07001113 assert(inst->exec_size <= 16 || inst->force_writemask_all);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001114 assert(inst->mlen != 0);
1115
Eric Anholt0e200512013-10-16 12:16:51 -07001116 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf),
Jason Ekstrand3a5df8b2014-10-24 11:35:51 -07001117 inst->exec_size / 8, inst->offset);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001118}
1119
1120void
Eric Anholt8dfc9f02013-10-16 11:51:22 -07001121fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst)
1122{
Francisco Jereza7d319c2016-05-16 15:47:39 -07001123 assert(inst->exec_size <= 16 || inst->force_writemask_all);
1124
Jason Ekstrand3a5df8b2014-10-24 11:35:51 -07001125 gen7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset);
Eric Anholt8dfc9f02013-10-16 11:51:22 -07001126}
1127
1128void
Eric Anholt29340d02012-11-07 10:42:34 -08001129fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
1130 struct brw_reg dst,
1131 struct brw_reg index,
1132 struct brw_reg offset)
Eric Anholt11dd9e92011-05-24 16:34:27 -07001133{
Francisco Jerez9b22a0d2016-12-08 20:05:18 -08001134 assert(type_sz(dst.type) == 4);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001135 assert(inst->mlen != 0);
1136
Eric Anholt454dc832012-06-20 15:41:14 -07001137 assert(index.file == BRW_IMMEDIATE_VALUE &&
1138 index.type == BRW_REGISTER_TYPE_UD);
Matt Turnere42fb0c2015-10-22 19:41:30 -07001139 uint32_t surf_index = index.ud;
Eric Anholt454dc832012-06-20 15:41:14 -07001140
1141 assert(offset.file == BRW_IMMEDIATE_VALUE &&
1142 offset.type == BRW_REGISTER_TYPE_UD);
Matt Turnere42fb0c2015-10-22 19:41:30 -07001143 uint32_t read_offset = offset.ud;
Eric Anholt454dc832012-06-20 15:41:14 -07001144
Eric Anholt11dd9e92011-05-24 16:34:27 -07001145 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
Eric Anholt454dc832012-06-20 15:41:14 -07001146 read_offset, surf_index);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001147}
1148
Eric Anholtd8214e42012-11-07 11:18:34 -08001149void
Eric Anholt461a2972012-12-05 00:06:30 -08001150fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
1151 struct brw_reg dst,
1152 struct brw_reg index,
Francisco Jerezad38ba12016-10-26 14:25:06 -07001153 struct brw_reg payload)
Eric Anholt461a2972012-12-05 00:06:30 -08001154{
Chris Forbes3fd359b2014-08-02 14:27:21 +12001155 assert(index.type == BRW_REGISTER_TYPE_UD);
Francisco Jerezad38ba12016-10-26 14:25:06 -07001156 assert(payload.file == BRW_GENERAL_REGISTER_FILE);
Francisco Jerez9b22a0d2016-12-08 20:05:18 -08001157 assert(type_sz(dst.type) == 4);
Kristian Høgsberg0ac4c272014-12-10 14:59:26 -08001158
Chris Forbes3fd359b2014-08-02 14:27:21 +12001159 if (index.file == BRW_IMMEDIATE_VALUE) {
Francisco Jerezad38ba12016-10-26 14:25:06 -07001160 const uint32_t surf_index = index.ud;
Chris Forbes3fd359b2014-08-02 14:27:21 +12001161
1162 brw_push_insn_state(p);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001163 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1164 brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
1165 brw_pop_insn_state(p);
1166
Francisco Jerez9b22a0d2016-12-08 20:05:18 -08001167 brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
1168 brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
1169 brw_set_dp_read_message(p, send, surf_index,
1170 BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
Francisco Jerezad38ba12016-10-26 14:25:06 -07001171 GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
1172 GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1173 1, /* mlen */
1174 true, /* header */
Francisco Jerez9b22a0d2016-12-08 20:05:18 -08001175 DIV_ROUND_UP(inst->size_written, REG_SIZE));
Chris Forbes3fd359b2014-08-02 14:27:21 +12001176
Francisco Jerezad38ba12016-10-26 14:25:06 -07001177 } else {
Chris Forbes3fd359b2014-08-02 14:27:21 +12001178 struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
1179
1180 brw_push_insn_state(p);
1181 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001182
1183 /* a0.0 = surf_index & 0xff */
1184 brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -07001185 brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001186 brw_set_dest(p, insn_and, addr);
1187 brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
1188 brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
1189
Francisco Jereza902a5d2015-03-19 15:44:24 +02001190 /* dst = send(payload, a0.0 | <descriptor>) */
1191 brw_inst *insn = brw_send_indirect_message(
Francisco Jerezad38ba12016-10-26 14:25:06 -07001192 p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
Francisco Jerez9b22a0d2016-12-08 20:05:18 -08001193 retype(dst, BRW_REGISTER_TYPE_UD),
1194 retype(payload, BRW_REGISTER_TYPE_UD), addr);
1195 brw_set_dp_read_message(p, insn, 0 /* surface */,
1196 BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
Francisco Jerezad38ba12016-10-26 14:25:06 -07001197 GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
1198 GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1199 1, /* mlen */
1200 true, /* header */
Francisco Jerez9b22a0d2016-12-08 20:05:18 -08001201 DIV_ROUND_UP(inst->size_written, REG_SIZE));
Chris Forbes3fd359b2014-08-02 14:27:21 +12001202
1203 brw_pop_insn_state(p);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001204 }
Eric Anholt461a2972012-12-05 00:06:30 -08001205}
1206
1207void
Francisco Jerezed4d0e42016-05-20 13:03:31 -07001208fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst,
1209 struct brw_reg dst,
1210 struct brw_reg index)
Eric Anholtd8214e42012-11-07 11:18:34 -08001211{
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001212 assert(devinfo->gen < 7); /* Should use the gen7 variant. */
Jason Ekstrand76c10862015-03-24 10:17:32 -07001213 assert(inst->header_size != 0);
Eric Anholt70b27e02013-03-18 10:16:42 -07001214 assert(inst->mlen);
Eric Anholtd8214e42012-11-07 11:18:34 -08001215
1216 assert(index.file == BRW_IMMEDIATE_VALUE &&
1217 index.type == BRW_REGISTER_TYPE_UD);
Matt Turnere42fb0c2015-10-22 19:41:30 -07001218 uint32_t surf_index = index.ud;
Eric Anholtd8214e42012-11-07 11:18:34 -08001219
Eric Anholt70b27e02013-03-18 10:16:42 -07001220 uint32_t simd_mode, rlen, msg_type;
Francisco Jerez448340d2016-05-20 00:13:33 -07001221 if (inst->exec_size == 16) {
Eric Anholt70b27e02013-03-18 10:16:42 -07001222 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
1223 rlen = 8;
Eric Anholtd8214e42012-11-07 11:18:34 -08001224 } else {
Francisco Jerez448340d2016-05-20 00:13:33 -07001225 assert(inst->exec_size == 8);
Eric Anholt70b27e02013-03-18 10:16:42 -07001226 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
1227 rlen = 4;
Eric Anholtd8214e42012-11-07 11:18:34 -08001228 }
1229
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001230 if (devinfo->gen >= 5)
Eric Anholt70b27e02013-03-18 10:16:42 -07001231 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
1232 else {
1233 /* We always use the SIMD16 message so that we only have to load U, and
1234 * not V or R.
1235 */
1236 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
1237 assert(inst->mlen == 3);
Francisco Jerez69570bb2016-09-07 13:38:20 -07001238 assert(inst->size_written == 8 * REG_SIZE);
Eric Anholt70b27e02013-03-18 10:16:42 -07001239 rlen = 8;
1240 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
1241 }
1242
Eric Anholtd8214e42012-11-07 11:18:34 -08001243 struct brw_reg header = brw_vec8_grf(0, 0);
1244 gen6_resolve_implied_move(p, &header, inst->base_mrf);
1245
Matt Turner7c796082014-06-13 14:29:25 -07001246 brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
Francisco Jerezc19c3d32016-05-18 15:29:27 -07001247 brw_inst_set_compression(devinfo, send, false);
Kenneth Graunke71846a92014-04-16 20:15:23 -07001248 brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
Eric Anholtd8214e42012-11-07 11:18:34 -08001249 brw_set_src0(p, send, header);
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001250 if (devinfo->gen < 6)
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -07001251 brw_inst_set_base_mrf(p->devinfo, send, inst->base_mrf);
Eric Anholt70b27e02013-03-18 10:16:42 -07001252
1253 /* Our surface is set up as floats, regardless of what actual data is
1254 * stored in it.
1255 */
1256 uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
1257 brw_set_sampler_message(p, send,
Eric Anholtd8214e42012-11-07 11:18:34 -08001258 surf_index,
Eric Anholt70b27e02013-03-18 10:16:42 -07001259 0, /* sampler (unused) */
Eric Anholtd8214e42012-11-07 11:18:34 -08001260 msg_type,
Eric Anholt70b27e02013-03-18 10:16:42 -07001261 rlen,
Eric Anholtd8214e42012-11-07 11:18:34 -08001262 inst->mlen,
Jason Ekstrand76c10862015-03-24 10:17:32 -07001263 inst->header_size != 0,
Eric Anholt70b27e02013-03-18 10:16:42 -07001264 simd_mode,
1265 return_format);
Eric Anholtd8214e42012-11-07 11:18:34 -08001266}
1267
1268void
1269fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
1270 struct brw_reg dst,
1271 struct brw_reg index,
1272 struct brw_reg offset)
1273{
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001274 assert(devinfo->gen >= 7);
Eric Anholtd8214e42012-11-07 11:18:34 -08001275 /* Varying-offset pull constant loads are treated as a normal expression on
1276 * gen7, so the fact that it's a send message is hidden at the IR level.
1277 */
Jason Ekstrand76c10862015-03-24 10:17:32 -07001278 assert(inst->header_size == 0);
Eric Anholtd8214e42012-11-07 11:18:34 -08001279 assert(!inst->mlen);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001280 assert(index.type == BRW_REGISTER_TYPE_UD);
Eric Anholtd8214e42012-11-07 11:18:34 -08001281
Eric Anholtdca5fc12013-03-13 14:48:55 -07001282 uint32_t simd_mode, rlen, mlen;
Francisco Jerez448340d2016-05-20 00:13:33 -07001283 if (inst->exec_size == 16) {
Eric Anholtdca5fc12013-03-13 14:48:55 -07001284 mlen = 2;
1285 rlen = 8;
1286 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
Eric Anholtd8214e42012-11-07 11:18:34 -08001287 } else {
Francisco Jerez448340d2016-05-20 00:13:33 -07001288 assert(inst->exec_size == 8);
Eric Anholtdca5fc12013-03-13 14:48:55 -07001289 mlen = 1;
1290 rlen = 4;
1291 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
Eric Anholtd8214e42012-11-07 11:18:34 -08001292 }
1293
Chris Forbes3fd359b2014-08-02 14:27:21 +12001294 if (index.file == BRW_IMMEDIATE_VALUE) {
Kenneth Graunke6d89bc82013-08-14 19:49:33 -07001295
Matt Turnere42fb0c2015-10-22 19:41:30 -07001296 uint32_t surf_index = index.ud;
Chris Forbes3fd359b2014-08-02 14:27:21 +12001297
1298 brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
Jason Ekstrand1385a4b2014-09-08 15:26:24 -07001299 brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
Chris Forbes3fd359b2014-08-02 14:27:21 +12001300 brw_set_src0(p, send, offset);
1301 brw_set_sampler_message(p, send,
1302 surf_index,
1303 0, /* LD message ignores sampler unit */
1304 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1305 rlen,
1306 mlen,
1307 false, /* no header */
1308 simd_mode,
1309 0);
1310
Chris Forbes3fd359b2014-08-02 14:27:21 +12001311 } else {
1312
1313 struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
1314
1315 brw_push_insn_state(p);
1316 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001317
1318 /* a0.0 = surf_index & 0xff */
1319 brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -07001320 brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001321 brw_set_dest(p, insn_and, addr);
1322 brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
1323 brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
1324
Francisco Jereza815cd82015-02-26 17:24:03 +02001325 brw_pop_insn_state(p);
1326
Francisco Jereza902a5d2015-03-19 15:44:24 +02001327 /* dst = send(offset, a0.0 | <descriptor>) */
1328 brw_inst *insn = brw_send_indirect_message(
1329 p, BRW_SFID_SAMPLER, retype(dst, BRW_REGISTER_TYPE_UW),
1330 offset, addr);
1331 brw_set_sampler_message(p, insn,
Chris Forbes3fd359b2014-08-02 14:27:21 +12001332 0 /* surface */,
1333 0 /* sampler */,
1334 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1335 rlen /* rlen */,
1336 mlen /* mlen */,
1337 false /* header */,
1338 simd_mode,
1339 0);
Chris Forbes3fd359b2014-08-02 14:27:21 +12001340 }
Eric Anholtd8214e42012-11-07 11:18:34 -08001341}
Paul Berry3f929ef2012-06-18 14:50:04 -07001342
1343/**
1344 * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred
1345 * into the flags register (f0.0).
1346 *
1347 * Used only on Gen6 and above.
1348 */
1349void
Eric Anholtb278f652012-12-06 10:36:11 -08001350fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
Paul Berry3f929ef2012-06-18 14:50:04 -07001351{
Eric Anholtb278f652012-12-06 10:36:11 -08001352 struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
Eric Anholtd5016492012-12-06 12:15:13 -08001353 struct brw_reg dispatch_mask;
Paul Berry3f929ef2012-06-18 14:50:04 -07001354
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001355 if (devinfo->gen >= 6)
Eric Anholtd5016492012-12-06 12:15:13 -08001356 dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
1357 else
1358 dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1359
Paul Berry3f929ef2012-06-18 14:50:04 -07001360 brw_push_insn_state(p);
Kenneth Graunkee3748092014-05-31 16:57:02 -07001361 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
Eric Anholtd5016492012-12-06 12:15:13 -08001362 brw_MOV(p, flags, dispatch_mask);
Paul Berry3f929ef2012-06-18 14:50:04 -07001363 brw_pop_insn_state(p);
1364}
1365
Chris Forbes6e91f2d2013-11-18 21:13:13 +13001366void
1367fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
1368 struct brw_reg dst,
1369 struct brw_reg src,
1370 struct brw_reg msg_data,
1371 unsigned msg_type)
1372{
Francisco Jerez69570bb2016-09-07 13:38:20 -07001373 assert(inst->size_written % REG_SIZE == 0);
Neil Robertsda361ac2015-07-17 14:40:03 +01001374 assert(msg_data.type == BRW_REGISTER_TYPE_UD);
Chris Forbes6e91f2d2013-11-18 21:13:13 +13001375
1376 brw_pixel_interpolator_query(p,
1377 retype(dst, BRW_REGISTER_TYPE_UW),
1378 src,
1379 inst->pi_noperspective,
1380 msg_type,
Neil Robertsda361ac2015-07-17 14:40:03 +01001381 msg_data,
Chris Forbes6e91f2d2013-11-18 21:13:13 +13001382 inst->mlen,
Francisco Jerez69570bb2016-09-07 13:38:20 -07001383 inst->size_written / REG_SIZE);
Chris Forbes6e91f2d2013-11-18 21:13:13 +13001384}
1385
Anuj Phogate12bbb52013-10-24 16:17:08 -07001386/* Sets vstride=1, width=4, hstride=0 of register src1 during
1387 * the ADD instruction.
1388 */
1389void
1390fs_generator::generate_set_sample_id(fs_inst *inst,
1391 struct brw_reg dst,
1392 struct brw_reg src0,
1393 struct brw_reg src1)
1394{
1395 assert(dst.type == BRW_REGISTER_TYPE_D ||
1396 dst.type == BRW_REGISTER_TYPE_UD);
1397 assert(src0.type == BRW_REGISTER_TYPE_D ||
1398 src0.type == BRW_REGISTER_TYPE_UD);
1399
Matt Turnere10fc052015-10-20 17:51:12 -07001400 struct brw_reg reg = stride(src1, 1, 4, 0);
Francisco Jerez448340d2016-05-20 00:13:33 -07001401 if (devinfo->gen >= 8 || inst->exec_size == 8) {
Jason Ekstrandf91b5662014-08-13 12:23:47 -07001402 brw_ADD(p, dst, src0, reg);
Francisco Jerez448340d2016-05-20 00:13:33 -07001403 } else if (inst->exec_size == 16) {
Matt Turner0f747962015-10-20 18:29:42 -07001404 brw_push_insn_state(p);
1405 brw_set_default_exec_size(p, BRW_EXECUTE_8);
1406 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
Jason Ekstrandf91b5662014-08-13 12:23:47 -07001407 brw_ADD(p, firsthalf(dst), firsthalf(src0), reg);
Matt Turner0f747962015-10-20 18:29:42 -07001408 brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
Jason Ekstrandf91b5662014-08-13 12:23:47 -07001409 brw_ADD(p, sechalf(dst), sechalf(src0), suboffset(reg, 2));
Matt Turner0f747962015-10-20 18:29:42 -07001410 brw_pop_insn_state(p);
Jason Ekstrandf91b5662014-08-13 12:23:47 -07001411 }
Anuj Phogate12bbb52013-10-24 16:17:08 -07001412}
1413
Chad Versace20dfa502013-01-09 11:46:42 -08001414void
1415fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
1416 struct brw_reg dst,
1417 struct brw_reg x,
1418 struct brw_reg y)
1419{
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001420 assert(devinfo->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001421 assert(dst.type == BRW_REGISTER_TYPE_UD);
Vinson Lee15599942013-01-26 08:27:50 +01001422 assert(x.type == BRW_REGISTER_TYPE_F);
1423 assert(y.type == BRW_REGISTER_TYPE_F);
Chad Versace20dfa502013-01-09 11:46:42 -08001424
1425 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
1426 *
1427 * Because this instruction does not have a 16-bit floating-point type,
1428 * the destination data type must be Word (W).
1429 *
1430 * The destination must be DWord-aligned and specify a horizontal stride
1431 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
1432 * each destination channel and the upper word is not modified.
1433 */
Francisco Jerez509f5872015-02-04 17:58:49 +02001434 struct brw_reg dst_w = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
Chad Versace20dfa502013-01-09 11:46:42 -08001435
Francisco Jerez509f5872015-02-04 17:58:49 +02001436 /* Give each 32-bit channel of dst the form below, where "." means
Chad Versace20dfa502013-01-09 11:46:42 -08001437 * unchanged.
1438 * 0x....hhhh
1439 */
1440 brw_F32TO16(p, dst_w, y);
1441
1442 /* Now the form:
1443 * 0xhhhh0000
1444 */
1445 brw_SHL(p, dst, dst, brw_imm_ud(16u));
1446
1447 /* And, finally the form of packHalf2x16's output:
1448 * 0xhhhhllll
1449 */
1450 brw_F32TO16(p, dst_w, x);
1451}
1452
1453void
1454fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
1455 struct brw_reg dst,
1456 struct brw_reg src)
1457{
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001458 assert(devinfo->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001459 assert(dst.type == BRW_REGISTER_TYPE_F);
1460 assert(src.type == BRW_REGISTER_TYPE_UD);
1461
1462 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1463 *
1464 * Because this instruction does not have a 16-bit floating-point type,
1465 * the source data type must be Word (W). The destination type must be
1466 * F (Float).
1467 */
Francisco Jerez509f5872015-02-04 17:58:49 +02001468 struct brw_reg src_w = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
Chad Versace20dfa502013-01-09 11:46:42 -08001469
1470 /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
1471 * For the Y case, we wish to access only the upper word; therefore
1472 * a 16-bit subregister offset is needed.
1473 */
1474 assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
1475 inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
1476 if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
Chad Versace09740312013-01-24 21:48:40 -08001477 src_w.subnr += 2;
Chad Versace20dfa502013-01-09 11:46:42 -08001478
1479 brw_F16TO32(p, dst, src_w);
1480}
1481
Eric Anholt11dd9e92011-05-24 16:34:27 -07001482void
Eric Anholt5c5218e2013-03-19 15:28:11 -07001483fs_generator::generate_shader_time_add(fs_inst *inst,
1484 struct brw_reg payload,
1485 struct brw_reg offset,
1486 struct brw_reg value)
1487{
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001488 assert(devinfo->gen >= 7);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001489 brw_push_insn_state(p);
Kenneth Graunkee3748092014-05-31 16:57:02 -07001490 brw_set_default_mask_control(p, true);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001491
1492 assert(payload.file == BRW_GENERAL_REGISTER_FILE);
1493 struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
1494 offset.type);
1495 struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
1496 value.type);
1497
1498 assert(offset.file == BRW_IMMEDIATE_VALUE);
1499 if (value.file == BRW_GENERAL_REGISTER_FILE) {
1500 value.width = BRW_WIDTH_1;
1501 value.hstride = BRW_HORIZONTAL_STRIDE_0;
1502 value.vstride = BRW_VERTICAL_STRIDE_0;
1503 } else {
1504 assert(value.file == BRW_IMMEDIATE_VALUE);
1505 }
1506
1507 /* Trying to deal with setup of the params from the IR is crazy in the FS8
1508 * case, and we don't really care about squeezing every bit of performance
1509 * out of this path, so we just emit the MOVs from here.
1510 */
1511 brw_MOV(p, payload_offset, offset);
1512 brw_MOV(p, payload_value, value);
Eric Anholt3c9dc2d2013-10-02 14:07:40 -07001513 brw_shader_time_add(p, payload,
Jordan Justenc43ae402014-08-29 12:50:46 -07001514 prog_data->binding_table.shader_time_start);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001515 brw_pop_insn_state(p);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -07001516
Jordan Justenc43ae402014-08-29 12:50:46 -07001517 brw_mark_surface_used(prog_data,
1518 prog_data->binding_table.shader_time_start);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001519}
1520
1521void
Kristian Høgsberg9a1af7b2014-10-27 19:40:47 -07001522fs_generator::enable_debug(const char *shader_name)
1523{
1524 debug_flag = true;
1525 this->shader_name = shader_name;
1526}
1527
Kristian Høgsbergf2bb6552014-11-13 16:28:08 -08001528int
1529fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
Eric Anholt11dd9e92011-05-24 16:34:27 -07001530{
Kristian Høgsbergf2bb6552014-11-13 16:28:08 -08001531 /* align to 64 byte boundary. */
1532 while (p->next_insn_offset % 64)
1533 brw_NOP(p);
1534
1535 this->dispatch_width = dispatch_width;
Kristian Høgsbergf2bb6552014-11-13 16:28:08 -08001536
Matt Turner92b05562014-05-25 10:42:32 -07001537 int start_offset = p->next_insn_offset;
Matt Turnerb0d422c2015-03-16 12:18:31 -07001538 int spill_count = 0, fill_count = 0;
Abdiel Janulguef3401452014-08-06 11:27:58 +03001539 int loop_count = 0;
Matt Turner92b05562014-05-25 10:42:32 -07001540
1541 struct annotation_info annotation;
1542 memset(&annotation, 0, sizeof(annotation));
1543
Matt Turnera3d0ccb2014-07-11 21:16:13 -07001544 foreach_block_and_inst (block, fs_inst, inst, cfg) {
Eric Anholt11dd9e92011-05-24 16:34:27 -07001545 struct brw_reg src[3], dst;
Kenneth Graunke776ad512014-05-30 16:41:32 -07001546 unsigned int last_insn_offset = p->next_insn_offset;
Matt Turner7452f182014-12-30 12:56:13 -08001547 bool multiple_instructions_emitted = false;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001548
Matt Turnerf01d92f2016-05-02 23:32:13 -07001549 /* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
1550 * "Register Region Restrictions" section: for BDW, SKL:
1551 *
1552 * "A POW/FDIV operation must not be followed by an instruction
1553 * that requires two destination registers."
1554 *
1555 * The documentation is often lacking annotations for Atom parts,
1556 * and empirically this affects CHV as well.
1557 */
1558 if (devinfo->gen >= 8 &&
1559 p->nr_insn > 1 &&
1560 brw_inst_opcode(devinfo, brw_last_inst) == BRW_OPCODE_MATH &&
1561 brw_inst_math_function(devinfo, brw_last_inst) == BRW_MATH_FUNCTION_POW &&
1562 inst->dst.component_size(inst->exec_size) > REG_SIZE) {
1563 brw_NOP(p);
1564 last_insn_offset = p->next_insn_offset;
1565 }
1566
Matt Turnerf0f7fb12014-05-19 10:20:37 -07001567 if (unlikely(debug_flag))
Jason Ekstrand9c89e472015-04-15 15:01:25 -07001568 annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001569
Francisco Jerez3340a662016-05-18 18:48:04 -07001570 /* If the instruction writes to more than one register, it needs to be
1571 * explicitly marked as compressed on Gen <= 5. On Gen >= 6 the
1572 * hardware figures out by itself what the right compression mode is,
1573 * but we still need to know whether the instruction is compressed to
1574 * set up the source register regions appropriately.
1575 *
1576 * XXX - This is wrong for instructions that write a single register but
1577 * read more than one which should strictly speaking be treated as
1578 * compressed. For instructions that don't write any registers it
1579 * relies on the destination being a null register of the correct
1580 * type and regioning so the instruction is considered compressed
1581 * or not accordingly.
1582 */
Francisco Jerez7f28ad82016-05-20 15:25:28 -07001583 const bool compressed =
1584 inst->dst.component_size(inst->exec_size) > REG_SIZE;
1585 brw_set_default_compression(p, compressed);
Francisco Jerezece41df2016-05-20 16:14:13 -07001586 brw_set_default_group(p, inst->group);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001587
Kenneth Graunke062ad812016-05-16 15:09:17 -07001588 for (unsigned int i = 0; i < inst->sources; i++) {
Kenneth Graunkedabaf4f2016-05-18 19:02:45 -07001589 src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
Francisco Jerez7f28ad82016-05-20 15:25:28 -07001590 compressed);
Kenneth Graunke062ad812016-05-16 15:09:17 -07001591
1592 /* The accumulator result appears to get used for the
1593 * conditional modifier generation. When negating a UD
1594 * value, there is a 33rd bit generated for the sign in the
1595 * accumulator value, so now you can't check, for example,
1596 * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
1597 */
1598 assert(!inst->conditional_mod ||
1599 inst->src[i].type != BRW_REGISTER_TYPE_UD ||
1600 !inst->src[i].negate);
1601 }
Francisco Jerez7f28ad82016-05-20 15:25:28 -07001602 dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen, compressed);
Kenneth Graunke062ad812016-05-16 15:09:17 -07001603
Francisco Jerez117a9a02016-05-18 18:41:28 -07001604 brw_set_default_access_mode(p, BRW_ALIGN_1);
Kenneth Graunke062ad812016-05-16 15:09:17 -07001605 brw_set_default_predicate_control(p, inst->predicate);
1606 brw_set_default_predicate_inverse(p, inst->predicate_inverse);
1607 brw_set_default_flag_reg(p, 0, inst->flag_subreg);
1608 brw_set_default_saturate(p, inst->saturate);
1609 brw_set_default_mask_control(p, inst->force_writemask_all);
1610 brw_set_default_acc_write_control(p, inst->writes_accumulator);
1611 brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
1612
Samuel Iglesias Gonsálvez9979a3f2016-06-20 12:13:14 +02001613 assert(inst->force_writemask_all || inst->exec_size >= 4);
Francisco Jerezece41df2016-05-20 16:14:13 -07001614 assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
Kenneth Graunke062ad812016-05-16 15:09:17 -07001615 assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
1616 assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
1617
Eric Anholt11dd9e92011-05-24 16:34:27 -07001618 switch (inst->opcode) {
1619 case BRW_OPCODE_MOV:
1620 brw_MOV(p, dst, src[0]);
1621 break;
1622 case BRW_OPCODE_ADD:
1623 brw_ADD(p, dst, src[0], src[1]);
1624 break;
1625 case BRW_OPCODE_MUL:
1626 brw_MUL(p, dst, src[0], src[1]);
1627 break;
Topi Pohjolainen8f3e5362013-12-17 16:39:16 +02001628 case BRW_OPCODE_AVG:
1629 brw_AVG(p, dst, src[0], src[1]);
1630 break;
Eric Anholt3f78f712011-08-15 22:36:18 -07001631 case BRW_OPCODE_MACH:
Eric Anholt3f78f712011-08-15 22:36:18 -07001632 brw_MACH(p, dst, src[0], src[1]);
Eric Anholt3f78f712011-08-15 22:36:18 -07001633 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001634
Matt Turner6be863a2014-04-01 17:25:12 -07001635 case BRW_OPCODE_LINE:
1636 brw_LINE(p, dst, src[0], src[1]);
1637 break;
1638
Eric Anholt7d55f372012-02-07 00:59:11 +01001639 case BRW_OPCODE_MAD:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001640 assert(devinfo->gen >= 6);
Kenneth Graunkee3748092014-05-31 16:57:02 -07001641 brw_set_default_access_mode(p, BRW_ALIGN_16);
Francisco Jerezf14b9ea2016-05-17 19:51:50 -07001642 brw_MAD(p, dst, src[0], src[1], src[2]);
Eric Anholt7d55f372012-02-07 00:59:11 +01001643 break;
1644
Kenneth Graunke0a1d1452012-12-02 00:08:15 -08001645 case BRW_OPCODE_LRP:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001646 assert(devinfo->gen >= 6);
Kenneth Graunkee3748092014-05-31 16:57:02 -07001647 brw_set_default_access_mode(p, BRW_ALIGN_16);
Francisco Jerezf14b9ea2016-05-17 19:51:50 -07001648 brw_LRP(p, dst, src[0], src[1], src[2]);
Kenneth Graunke0a1d1452012-12-02 00:08:15 -08001649 break;
1650
Eric Anholt11dd9e92011-05-24 16:34:27 -07001651 case BRW_OPCODE_FRC:
1652 brw_FRC(p, dst, src[0]);
1653 break;
1654 case BRW_OPCODE_RNDD:
1655 brw_RNDD(p, dst, src[0]);
1656 break;
1657 case BRW_OPCODE_RNDE:
1658 brw_RNDE(p, dst, src[0]);
1659 break;
1660 case BRW_OPCODE_RNDZ:
1661 brw_RNDZ(p, dst, src[0]);
1662 break;
1663
1664 case BRW_OPCODE_AND:
1665 brw_AND(p, dst, src[0], src[1]);
1666 break;
1667 case BRW_OPCODE_OR:
1668 brw_OR(p, dst, src[0], src[1]);
1669 break;
1670 case BRW_OPCODE_XOR:
1671 brw_XOR(p, dst, src[0], src[1]);
1672 break;
1673 case BRW_OPCODE_NOT:
1674 brw_NOT(p, dst, src[0]);
1675 break;
1676 case BRW_OPCODE_ASR:
1677 brw_ASR(p, dst, src[0], src[1]);
1678 break;
1679 case BRW_OPCODE_SHR:
1680 brw_SHR(p, dst, src[0], src[1]);
1681 break;
1682 case BRW_OPCODE_SHL:
1683 brw_SHL(p, dst, src[0], src[1]);
1684 break;
Chad Versace20dfa502013-01-09 11:46:42 -08001685 case BRW_OPCODE_F32TO16:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001686 assert(devinfo->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001687 brw_F32TO16(p, dst, src[0]);
1688 break;
1689 case BRW_OPCODE_F16TO32:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001690 assert(devinfo->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001691 brw_F16TO32(p, dst, src[0]);
1692 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001693 case BRW_OPCODE_CMP:
Francisco Jerez95272f52016-05-17 19:59:18 -07001694 if (inst->exec_size >= 16 && devinfo->gen == 7 && !devinfo->is_haswell &&
1695 dst.file == BRW_ARCHITECTURE_REGISTER_FILE) {
1696 /* For unknown reasons the WaCMPInstFlagDepClearedEarly workaround
1697 * implemented in the compiler is not sufficient. Overriding the
1698 * type when the destination is the null register is necessary but
1699 * not sufficient by itself.
1700 */
1701 assert(dst.nr == BRW_ARF_NULL);
1702 dst.type = BRW_REGISTER_TYPE_D;
Matt Turner7e607942015-02-03 17:38:49 -08001703 }
Francisco Jerez95272f52016-05-17 19:59:18 -07001704 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001705 break;
1706 case BRW_OPCODE_SEL:
1707 brw_SEL(p, dst, src[0], src[1]);
1708 break;
Matt Turner1f0f26d2013-04-09 19:22:34 -07001709 case BRW_OPCODE_BFREV:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001710 assert(devinfo->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001711 /* BFREV only supports UD type for src and dst. */
1712 brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
1713 retype(src[0], BRW_REGISTER_TYPE_UD));
1714 break;
1715 case BRW_OPCODE_FBH:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001716 assert(devinfo->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001717 /* FBH only supports UD type for dst. */
1718 brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1719 break;
1720 case BRW_OPCODE_FBL:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001721 assert(devinfo->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001722 /* FBL only supports UD type for dst. */
1723 brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1724 break;
Ian Romanickde200862016-06-21 15:14:03 -07001725 case BRW_OPCODE_LZD:
1726 brw_LZD(p, dst, src[0]);
1727 break;
Matt Turner1f0f26d2013-04-09 19:22:34 -07001728 case BRW_OPCODE_CBIT:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001729 assert(devinfo->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001730 /* CBIT only supports UD type for dst. */
1731 brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1732 break;
Matt Turner014cce32013-09-19 13:01:08 -07001733 case BRW_OPCODE_ADDC:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001734 assert(devinfo->gen >= 7);
Matt Turner014cce32013-09-19 13:01:08 -07001735 brw_ADDC(p, dst, src[0], src[1]);
Matt Turner014cce32013-09-19 13:01:08 -07001736 break;
1737 case BRW_OPCODE_SUBB:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001738 assert(devinfo->gen >= 7);
Matt Turner014cce32013-09-19 13:01:08 -07001739 brw_SUBB(p, dst, src[0], src[1]);
Matt Turner014cce32013-09-19 13:01:08 -07001740 break;
Juha-Pekka Heikkilada0c3b02014-03-28 15:28:32 +02001741 case BRW_OPCODE_MAC:
1742 brw_MAC(p, dst, src[0], src[1]);
1743 break;
Matt Turner1f0f26d2013-04-09 19:22:34 -07001744
1745 case BRW_OPCODE_BFE:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001746 assert(devinfo->gen >= 7);
Kenneth Graunkee3748092014-05-31 16:57:02 -07001747 brw_set_default_access_mode(p, BRW_ALIGN_16);
Francisco Jerezf14b9ea2016-05-17 19:51:50 -07001748 brw_BFE(p, dst, src[0], src[1], src[2]);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001749 break;
1750
1751 case BRW_OPCODE_BFI1:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001752 assert(devinfo->gen >= 7);
Francisco Jerez982c48d2016-05-17 20:02:29 -07001753 brw_BFI1(p, dst, src[0], src[1]);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001754 break;
1755 case BRW_OPCODE_BFI2:
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001756 assert(devinfo->gen >= 7);
Kenneth Graunkee3748092014-05-31 16:57:02 -07001757 brw_set_default_access_mode(p, BRW_ALIGN_16);
Francisco Jerez982c48d2016-05-17 20:02:29 -07001758 brw_BFI2(p, dst, src[0], src[1], src[2]);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001759 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001760
1761 case BRW_OPCODE_IF:
1762 if (inst->src[0].file != BAD_FILE) {
1763 /* The instruction has an embedded compare (only allowed on gen6) */
Jason Ekstrand5bda1ff2015-04-14 17:45:40 -07001764 assert(devinfo->gen == 6);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001765 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
1766 } else {
Francisco Jerez8ef56372016-05-18 19:36:03 -07001767 brw_IF(p, brw_inst_exec_size(devinfo, p->current));
Eric Anholt11dd9e92011-05-24 16:34:27 -07001768 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001769 break;
1770
1771 case BRW_OPCODE_ELSE:
1772 brw_ELSE(p);
1773 break;
1774 case BRW_OPCODE_ENDIF:
1775 brw_ENDIF(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001776 break;
1777
1778 case BRW_OPCODE_DO:
Francisco Jerez8ef56372016-05-18 19:36:03 -07001779 brw_DO(p, brw_inst_exec_size(devinfo, p->current));
Eric Anholt11dd9e92011-05-24 16:34:27 -07001780 break;
1781
1782 case BRW_OPCODE_BREAK:
Eric Anholtf1d89632011-12-06 12:44:41 -08001783 brw_BREAK(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001784 break;
1785 case BRW_OPCODE_CONTINUE:
Kenneth Graunkee64dbd02014-08-04 14:26:26 -07001786 brw_CONT(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001787 break;
1788
Eric Anholtce6be332011-12-06 12:30:03 -08001789 case BRW_OPCODE_WHILE:
Eric Anholtce6be332011-12-06 12:30:03 -08001790 brw_WHILE(p);
Abdiel Janulguef3401452014-08-06 11:27:58 +03001791 loop_count++;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001792 break;
1793
Eric Anholt65b5cbb2011-08-05 12:38:58 -07001794 case SHADER_OPCODE_RCP:
1795 case SHADER_OPCODE_RSQ:
1796 case SHADER_OPCODE_SQRT:
1797 case SHADER_OPCODE_EXP2:
1798 case SHADER_OPCODE_LOG2:
Eric Anholt65b5cbb2011-08-05 12:38:58 -07001799 case SHADER_OPCODE_SIN:
1800 case SHADER_OPCODE_COS:
Matt Turnerf5bef2d2014-11-21 12:34:22 -08001801 assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
Francisco Jerez3a541d02016-05-17 19:10:48 -07001802 if (devinfo->gen >= 6) {
1803 assert(inst->mlen == 0);
1804 assert(devinfo->gen >= 7 || inst->exec_size == 8);
1805 gen6_math(p, dst, brw_math_function(inst->opcode),
1806 src[0], brw_null_reg());
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001807 } else {
Francisco Jerez3a541d02016-05-17 19:10:48 -07001808 assert(inst->mlen >= 1);
1809 assert(devinfo->gen == 5 || devinfo->is_g4x || inst->exec_size == 8);
1810 gen4_math(p, dst,
1811 brw_math_function(inst->opcode),
1812 inst->base_mrf, src[0],
1813 BRW_MATH_PRECISION_FULL);
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001814 }
1815 break;
Kenneth Graunkeff8f2722011-09-28 17:37:54 -07001816 case SHADER_OPCODE_INT_QUOTIENT:
1817 case SHADER_OPCODE_INT_REMAINDER:
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001818 case SHADER_OPCODE_POW:
Matt Turnerf5bef2d2014-11-21 12:34:22 -08001819 assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
Francisco Jerez3a541d02016-05-17 19:10:48 -07001820 if (devinfo->gen >= 6) {
1821 assert(inst->mlen == 0);
1822 assert((devinfo->gen >= 7 && inst->opcode == SHADER_OPCODE_POW) ||
1823 inst->exec_size == 8);
Kenneth Graunke35e48bd2014-06-07 02:27:43 -07001824 gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
Francisco Jerez3a541d02016-05-17 19:10:48 -07001825 } else {
1826 assert(inst->mlen >= 1);
1827 assert(inst->exec_size == 8);
1828 gen4_math(p, dst, brw_math_function(inst->opcode),
1829 inst->base_mrf, src[0],
1830 BRW_MATH_PRECISION_FULL);
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001831 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001832 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001833 case FS_OPCODE_CINTERP:
1834 brw_MOV(p, dst, src[0]);
1835 break;
1836 case FS_OPCODE_LINTERP:
1837 generate_linterp(inst, dst, src);
1838 break;
Matt Turner529064f2015-04-14 13:17:38 -07001839 case FS_OPCODE_PIXEL_X:
1840 assert(src[0].type == BRW_REGISTER_TYPE_UW);
1841 src[0].subnr = 0 * type_sz(src[0].type);
1842 brw_MOV(p, dst, stride(src[0], 8, 4, 1));
1843 break;
1844 case FS_OPCODE_PIXEL_Y:
1845 assert(src[0].type == BRW_REGISTER_TYPE_UW);
1846 src[0].subnr = 4 * type_sz(src[0].type);
1847 brw_MOV(p, dst, stride(src[0], 8, 4, 1));
1848 break;
Samuel Iglesias Gonsalvezb23eb642015-04-13 16:55:49 +02001849 case FS_OPCODE_GET_BUFFER_SIZE:
1850 generate_get_buffer_size(inst, dst, src[0], src[1]);
1851 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -07001852 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -07001853 case FS_OPCODE_TXB:
Kenneth Graunkefebad172011-10-26 12:58:37 -07001854 case SHADER_OPCODE_TXD:
1855 case SHADER_OPCODE_TXF:
Matt Turner75dccf52016-05-04 15:46:45 -07001856 case SHADER_OPCODE_TXF_LZ:
Topi Pohjolainence527a62013-12-10 16:36:31 +02001857 case SHADER_OPCODE_TXF_CMS:
Neil Robertse386fb02015-09-08 15:52:09 +01001858 case SHADER_OPCODE_TXF_CMS_W:
Topi Pohjolainen41d397f2013-12-10 16:38:15 +02001859 case SHADER_OPCODE_TXF_UMS:
Chris Forbes7629c482013-11-30 10:32:16 +13001860 case SHADER_OPCODE_TXF_MCS:
Kenneth Graunkefebad172011-10-26 12:58:37 -07001861 case SHADER_OPCODE_TXL:
Matt Turner75dccf52016-05-04 15:46:45 -07001862 case SHADER_OPCODE_TXL_LZ:
Kenneth Graunkefebad172011-10-26 12:58:37 -07001863 case SHADER_OPCODE_TXS:
Matt Turnerb8aa9f72013-03-06 14:47:01 -08001864 case SHADER_OPCODE_LOD:
Chris Forbesfb455502013-03-31 21:31:12 +13001865 case SHADER_OPCODE_TG4:
Chris Forbes6bb2cf22013-10-08 21:42:10 +13001866 case SHADER_OPCODE_TG4_OFFSET:
Ilia Mirkin0b91bce2015-08-11 20:37:32 -04001867 case SHADER_OPCODE_SAMPLEINFO:
Jason Ekstrandb8ab9c82016-02-05 18:24:02 -08001868 generate_tex(inst, dst, src[0], src[1], src[2]);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001869 break;
Kenneth Graunkecea37f02014-11-08 01:39:14 -08001870 case FS_OPCODE_DDX_COARSE:
1871 case FS_OPCODE_DDX_FINE:
1872 generate_ddx(inst->opcode, dst, src[0]);
1873 break;
1874 case FS_OPCODE_DDY_COARSE:
1875 case FS_OPCODE_DDY_FINE:
Kenneth Graunkedac10e82016-05-17 01:52:16 -07001876 generate_ddy(inst->opcode, dst, src[0]);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001877 break;
1878
Eric Anholt60322612013-10-16 11:45:06 -07001879 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
1880 generate_scratch_write(inst, src[0]);
Matt Turnerb0d422c2015-03-16 12:18:31 -07001881 spill_count++;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001882 break;
1883
Eric Anholt60322612013-10-16 11:45:06 -07001884 case SHADER_OPCODE_GEN4_SCRATCH_READ:
1885 generate_scratch_read(inst, dst);
Matt Turnerb0d422c2015-03-16 12:18:31 -07001886 fill_count++;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001887 break;
1888
Eric Anholt8dfc9f02013-10-16 11:51:22 -07001889 case SHADER_OPCODE_GEN7_SCRATCH_READ:
1890 generate_scratch_read_gen7(inst, dst);
Matt Turnerb0d422c2015-03-16 12:18:31 -07001891 fill_count++;
Eric Anholt8dfc9f02013-10-16 11:51:22 -07001892 break;
1893
Kenneth Graunked2f089b2015-11-07 18:58:34 -08001894 case SHADER_OPCODE_MOV_INDIRECT:
1895 generate_mov_indirect(inst, dst, src[0], src[1]);
1896 break;
1897
Kenneth Graunkeac988882015-09-29 14:32:02 -07001898 case SHADER_OPCODE_URB_READ_SIMD8:
Kenneth Graunke5480bbd2015-11-07 01:37:33 -08001899 case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
Kenneth Graunkeac988882015-09-29 14:32:02 -07001900 generate_urb_read(inst, dst, src[0]);
1901 break;
1902
Kristian Høgsbergd9e29f52014-10-20 23:00:50 -07001903 case SHADER_OPCODE_URB_WRITE_SIMD8:
Kenneth Graunkebea75222015-05-06 00:04:10 -07001904 case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
1905 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
1906 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
Kristian Høgsbergd9e29f52014-10-20 23:00:50 -07001907 generate_urb_write(inst, src[0]);
1908 break;
1909
Eric Anholt29340d02012-11-07 10:42:34 -08001910 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
Francisco Jerez01503502016-05-23 14:07:23 -07001911 assert(inst->force_writemask_all);
Eric Anholt29340d02012-11-07 10:42:34 -08001912 generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001913 break;
1914
Eric Anholt461a2972012-12-05 00:06:30 -08001915 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
Francisco Jerez01503502016-05-23 14:07:23 -07001916 assert(inst->force_writemask_all);
Eric Anholt461a2972012-12-05 00:06:30 -08001917 generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
1918 break;
1919
Francisco Jerezed4d0e42016-05-20 13:03:31 -07001920 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
1921 generate_varying_pull_constant_load_gen4(inst, dst, src[0]);
Eric Anholtd8214e42012-11-07 11:18:34 -08001922 break;
1923
1924 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
1925 generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
1926 break;
1927
Kristian Høgsbergf9dc7aa2014-07-07 15:27:17 -07001928 case FS_OPCODE_REP_FB_WRITE:
Eric Anholt11dd9e92011-05-24 16:34:27 -07001929 case FS_OPCODE_FB_WRITE:
Jason Ekstrand8b0e4b32014-09-16 15:16:20 -07001930 generate_fb_write(inst, src[0]);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001931 break;
Paul Berry3f929ef2012-06-18 14:50:04 -07001932
Francisco Jerezf2f75b02016-07-21 16:52:33 -07001933 case FS_OPCODE_FB_READ:
1934 generate_fb_read(inst, dst, src[0]);
1935 break;
1936
Paul Berry3f929ef2012-06-18 14:50:04 -07001937 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
Eric Anholtb278f652012-12-06 10:36:11 -08001938 generate_mov_dispatch_to_flags(inst);
Paul Berry3f929ef2012-06-18 14:50:04 -07001939 break;
1940
Eric Anholtbeafced2012-12-06 10:15:08 -08001941 case FS_OPCODE_DISCARD_JUMP:
1942 generate_discard_jump(inst);
1943 break;
1944
Eric Anholt71f06342012-11-27 14:10:52 -08001945 case SHADER_OPCODE_SHADER_TIME_ADD:
Eric Anholt5c5218e2013-03-19 15:28:11 -07001946 generate_shader_time_add(inst, src[0], src[1], src[2]);
Eric Anholt71f06342012-11-27 14:10:52 -08001947 break;
1948
Francisco Jerezcfaaa9b2013-09-11 14:01:50 -07001949 case SHADER_OPCODE_UNTYPED_ATOMIC:
Francisco Jerez3af26232015-07-20 17:38:15 +03001950 assert(src[2].file == BRW_IMMEDIATE_VALUE);
Matt Turnere42fb0c2015-10-22 19:41:30 -07001951 brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
Francisco Jerez8865fe32015-02-26 17:41:46 +02001952 inst->mlen, !inst->dst.is_null());
Francisco Jerezcfaaa9b2013-09-11 14:01:50 -07001953 break;
1954
Francisco Jerez5e621cb2013-09-11 14:03:13 -07001955 case SHADER_OPCODE_UNTYPED_SURFACE_READ:
Francisco Jerez3af26232015-07-20 17:38:15 +03001956 assert(src[2].file == BRW_IMMEDIATE_VALUE);
Francisco Jerez0519a622015-04-22 21:10:43 +03001957 brw_untyped_surface_read(p, dst, src[0], src[1],
Matt Turnere42fb0c2015-10-22 19:41:30 -07001958 inst->mlen, src[2].ud);
Francisco Jerez5e621cb2013-09-11 14:03:13 -07001959 break;
1960
Francisco Jerez0775d882015-04-23 14:24:14 +03001961 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
1962 assert(src[2].file == BRW_IMMEDIATE_VALUE);
1963 brw_untyped_surface_write(p, src[0], src[1],
Matt Turnere42fb0c2015-10-22 19:41:30 -07001964 inst->mlen, src[2].ud);
Francisco Jerez0775d882015-04-23 14:24:14 +03001965 break;
1966
Francisco Jerezf118e5d2015-04-23 14:28:25 +03001967 case SHADER_OPCODE_TYPED_ATOMIC:
1968 assert(src[2].file == BRW_IMMEDIATE_VALUE);
1969 brw_typed_atomic(p, dst, src[0], src[1],
Matt Turnere42fb0c2015-10-22 19:41:30 -07001970 src[2].ud, inst->mlen, !inst->dst.is_null());
Francisco Jerezf118e5d2015-04-23 14:28:25 +03001971 break;
1972
1973 case SHADER_OPCODE_TYPED_SURFACE_READ:
1974 assert(src[2].file == BRW_IMMEDIATE_VALUE);
1975 brw_typed_surface_read(p, dst, src[0], src[1],
Matt Turnere42fb0c2015-10-22 19:41:30 -07001976 inst->mlen, src[2].ud);
Francisco Jerezf118e5d2015-04-23 14:28:25 +03001977 break;
1978
1979 case SHADER_OPCODE_TYPED_SURFACE_WRITE:
1980 assert(src[2].file == BRW_IMMEDIATE_VALUE);
Matt Turnere42fb0c2015-10-22 19:41:30 -07001981 brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud);
Francisco Jerezf118e5d2015-04-23 14:28:25 +03001982 break;
1983
Francisco Jerezf1d1d172015-04-23 14:30:28 +03001984 case SHADER_OPCODE_MEMORY_FENCE:
1985 brw_memory_fence(p, dst);
1986 break;
1987
Jason Ekstrand8a468d12016-09-14 15:09:33 -07001988 case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
1989 const struct brw_reg mask =
Francisco Jerezc05a4f12016-09-15 17:24:10 -07001990 brw_stage_has_packed_dispatch(devinfo, stage,
1991 prog_data) ? brw_imm_ud(~0u) :
1992 stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :
1993 brw_dmask_reg();
Jason Ekstrand8a468d12016-09-14 15:09:33 -07001994 brw_find_live_channel(p, dst, mask);
Francisco Jerez715bc6d2015-04-23 14:42:53 +03001995 break;
Jason Ekstrand8a468d12016-09-14 15:09:33 -07001996 }
Francisco Jerez715bc6d2015-04-23 14:42:53 +03001997
Francisco Jerezc74511f2015-02-20 20:14:24 +02001998 case SHADER_OPCODE_BROADCAST:
Francisco Jerez81bc6de2016-05-19 00:10:03 -07001999 assert(inst->force_writemask_all);
Francisco Jerezc74511f2015-02-20 20:14:24 +02002000 brw_broadcast(p, dst, src[0], src[1]);
2001 break;
2002
Anuj Phogate12bbb52013-10-24 16:17:08 -07002003 case FS_OPCODE_SET_SAMPLE_ID:
2004 generate_set_sample_id(inst, dst, src[0], src[1]);
2005 break;
2006
Chad Versace20dfa502013-01-09 11:46:42 -08002007 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
2008 generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
2009 break;
2010
2011 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
2012 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
2013 generate_unpack_half_2x16_split(inst, dst, src[0]);
2014 break;
2015
Kenneth Graunke57a50252013-03-27 23:19:39 -07002016 case FS_OPCODE_PLACEHOLDER_HALT:
2017 /* This is the place where the final HALT needs to be inserted if
2018 * we've emitted any discards. If not, this will emit no code.
2019 */
Matt Turnerf0f7fb12014-05-19 10:20:37 -07002020 if (!patch_discard_jumps_to_fb_writes()) {
Matt Turnerc9fd6842014-05-25 10:30:13 -07002021 if (unlikely(debug_flag)) {
Matt Turner92b05562014-05-25 10:42:32 -07002022 annotation.ann_count--;
Matt Turnerc9fd6842014-05-25 10:30:13 -07002023 }
Matt Turnerf0f7fb12014-05-19 10:20:37 -07002024 }
Kenneth Graunke57a50252013-03-27 23:19:39 -07002025 break;
2026
Chris Forbes6e91f2d2013-11-18 21:13:13 +13002027 case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
2028 generate_pixel_interpolator_query(inst, dst, src[0], src[1],
2029 GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE);
2030 break;
2031
2032 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
2033 generate_pixel_interpolator_query(inst, dst, src[0], src[1],
2034 GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET);
2035 break;
2036
2037 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
2038 generate_pixel_interpolator_query(inst, dst, src[0], src[1],
2039 GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
2040 break;
2041
Jordan Justen2a4df9c2014-08-27 11:33:25 -07002042 case CS_OPCODE_CS_TERMINATE:
2043 generate_cs_terminate(inst, src[0]);
2044 break;
2045
Jordan Justenf7ef8ec2014-08-27 11:32:08 -07002046 case SHADER_OPCODE_BARRIER:
2047 generate_barrier(inst, src[0]);
2048 break;
2049
Samuel Iglesias Gonsálvez6e289762016-07-07 08:38:22 +02002050 case BRW_OPCODE_DIM:
2051 assert(devinfo->is_haswell);
2052 assert(src[0].type == BRW_REGISTER_TYPE_DF);
2053 assert(dst.type == BRW_REGISTER_TYPE_DF);
2054 brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
2055 break;
2056
Eric Anholt11dd9e92011-05-24 16:34:27 -07002057 default:
Jason Ekstrand5cb91db2015-04-15 14:51:18 -07002058 unreachable("Unsupported opcode");
Matt Turnerb9962162014-05-27 18:47:40 -07002059
2060 case SHADER_OPCODE_LOAD_PAYLOAD:
Matt Turner3d826722014-06-29 14:54:01 -07002061 unreachable("Should be lowered by lower_load_payload()");
Eric Anholt11dd9e92011-05-24 16:34:27 -07002062 }
Kenneth Graunke776ad512014-05-30 16:41:32 -07002063
Matt Turner7452f182014-12-30 12:56:13 -08002064 if (multiple_instructions_emitted)
2065 continue;
2066
Matt Turner6d253632014-06-28 23:31:04 -07002067 if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
2068 assert(p->next_insn_offset == last_insn_offset + 16 ||
2069 !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
2070 "emitting more than 1 instruction");
2071
Matt Turner7c796082014-06-13 14:29:25 -07002072 brw_inst *last = &p->store[last_insn_offset / 16];
Matt Turner6d253632014-06-28 23:31:04 -07002073
Matt Turner56ac2592014-11-21 12:20:53 -08002074 if (inst->conditional_mod)
Jason Ekstrand4e9c79c2015-04-14 18:00:06 -07002075 brw_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod);
2076 brw_inst_set_no_dd_clear(p->devinfo, last, inst->no_dd_clear);
2077 brw_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
Kenneth Graunke776ad512014-05-30 16:41:32 -07002078 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07002079 }
2080
Matt Turner90eaf012016-08-29 15:57:41 -07002081 brw_set_uip_jip(p, start_offset);
Matt Turner92b05562014-05-25 10:42:32 -07002082 annotation_finalize(&annotation, p->next_insn_offset);
2083
Matt Turner0b45d472015-06-29 14:08:51 -07002084#ifndef NDEBUG
2085 bool validated = brw_validate_instructions(p, start_offset, &annotation);
2086#else
2087 if (unlikely(debug_flag))
2088 brw_validate_instructions(p, start_offset, &annotation);
2089#endif
2090
Matt Turner757d7dd2014-05-25 14:56:41 -07002091 int before_size = p->next_insn_offset - start_offset;
Matt Turner92b05562014-05-25 10:42:32 -07002092 brw_compact_instructions(p, start_offset, annotation.ann_count,
2093 annotation.ann);
Matt Turner757d7dd2014-05-25 14:56:41 -07002094 int after_size = p->next_insn_offset - start_offset;
Matt Turner92b05562014-05-25 10:42:32 -07002095
2096 if (unlikely(debug_flag)) {
Kristian Høgsberg9a1af7b2014-10-27 19:40:47 -07002097 fprintf(stderr, "Native code for %s\n"
Connor Abbott45cd76e2015-06-06 10:55:21 -04002098 "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
Kristian Høgsberg9a1af7b2014-10-27 19:40:47 -07002099 " bytes (%.0f%%)\n",
Connor Abbott45cd76e2015-06-06 10:55:21 -04002100 shader_name, dispatch_width, before_size / 16, loop_count, cfg->cycle_count,
Matt Turnerb0d422c2015-03-16 12:18:31 -07002101 spill_count, fill_count, promoted_constants, before_size, after_size,
Matt Turner757d7dd2014-05-25 14:56:41 -07002102 100.0f * (before_size - after_size) / before_size);
Matt Turner6fdfe3f2014-05-25 10:46:55 -07002103
Jason Ekstrand9c89e472015-04-15 15:01:25 -07002104 dump_assembly(p->store, annotation.ann_count, annotation.ann,
Jason Ekstrande00314b2015-10-05 15:49:34 -07002105 p->devinfo);
Matt Turner93e371c2015-06-29 14:05:27 -07002106 ralloc_free(annotation.mem_ctx);
Matt Turner92b05562014-05-25 10:42:32 -07002107 }
Matt Turner0b45d472015-06-29 14:08:51 -07002108 assert(validated);
Kristian Høgsbergf2bb6552014-11-13 16:28:08 -08002109
Jason Ekstrande639a6f2015-04-16 14:13:52 -07002110 compiler->shader_debug_log(log_data,
Connor Abbott45cd76e2015-06-06 10:55:21 -04002111 "%s SIMD%d shader: %d inst, %d loops, %u cycles, "
Jason Ekstrande639a6f2015-04-16 14:13:52 -07002112 "%d:%d spills:fills, Promoted %u constants, "
Matt Turner138a7dc2016-01-13 16:17:26 -08002113 "compacted %d to %d bytes.",
Jason Ekstrand9870f792016-01-14 20:27:51 -08002114 _mesa_shader_stage_to_abbrev(stage),
2115 dispatch_width, before_size / 16,
Connor Abbott45cd76e2015-06-06 10:55:21 -04002116 loop_count, cfg->cycle_count, spill_count,
2117 fill_count, promoted_constants, before_size,
2118 after_size);
Matt Turner9ed8d002014-11-14 12:46:44 -08002119
Kristian Høgsbergf2bb6552014-11-13 16:28:08 -08002120 return start_offset;
Eric Anholt11dd9e92011-05-24 16:34:27 -07002121}
Kenneth Graunkeea681a02012-11-09 01:05:47 -08002122
2123const unsigned *
Kristian Høgsbergf2bb6552014-11-13 16:28:08 -08002124fs_generator::get_assembly(unsigned int *assembly_size)
Kenneth Graunkeea681a02012-11-09 01:05:47 -08002125{
Kenneth Graunkeea681a02012-11-09 01:05:47 -08002126 return brw_get_program(p, assembly_size);
2127}