Blame - src/mesa/drivers/dri/i965/brw_fs_emit.cpp - platform/external/mesa3d

blob: 6b7c434949c01029f8ccbd7e8d9c43983c85be87 [file] [log] [blame]

Eric Anholt	11dd9e9	2011-05-24 16:34:27 -0700	[diff] [blame]	1	/*
				2	* Copyright © 2010 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*/
				23
				24	/** @file brw_fs_emit.cpp
				25	*
				26	* This file supports emitting code from the FS LIR to the actual
				27	* native instructions.
				28	*/
				29
				30	extern "C" {
				31	#include "main/macros.h"
				32	#include "brw_context.h"
				33	#include "brw_eu.h"
				34	} /* extern "C" */
				35
				36	#include "brw_fs.h"
				37	#include "../glsl/ir_print_visitor.h"
				38
				39	void
				40	fs_visitor::generate_fb_write(fs_inst *inst)
				41	{
				42	GLboolean eot = inst->eot;
				43	struct brw_reg implied_header;
				44
				45	/* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
				46	* move, here's g1.
				47	*/
				48	brw_push_insn_state(p);
				49	brw_set_mask_control(p, BRW_MASK_DISABLE);
				50	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				51
				52	if (inst->header_present) {
				53	if (intel->gen >= 6) {
				54	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
				55	brw_MOV(p,
				56	retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
				57	retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
				58	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				59
				60	if (inst->target > 0) {
				61	/* Set the render target index for choosing BLEND_STATE. */
				62	brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
				63	BRW_REGISTER_TYPE_UD),
				64	brw_imm_ud(inst->target));
				65	}
				66
				67	implied_header = brw_null_reg();
				68	} else {
				69	implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
				70
				71	brw_MOV(p,
				72	brw_message_reg(inst->base_mrf + 1),
				73	brw_vec8_grf(1, 0));
				74	}
				75	} else {
				76	implied_header = brw_null_reg();
				77	}
				78
				79	brw_pop_insn_state(p);
				80
				81	brw_fb_WRITE(p,
				82	c->dispatch_width,
				83	inst->base_mrf,
				84	implied_header,
				85	inst->target,
				86	inst->mlen,
				87	0,
				88	eot,
				89	inst->header_present);
				90	}
				91
				92	/* Computes the integer pixel x,y values from the origin.
				93	*
				94	* This is the basis of gl_FragCoord computation, but is also used
				95	* pre-gen6 for computing the deltas from v0 for computing
				96	* interpolation.
				97	*/
				98	void
				99	fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
				100	{
				101	struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
				102	struct brw_reg src;
				103	struct brw_reg deltas;
				104
				105	if (is_x) {
				106	src = stride(suboffset(g1_uw, 4), 2, 4, 0);
				107	deltas = brw_imm_v(0x10101010);
				108	} else {
				109	src = stride(suboffset(g1_uw, 5), 2, 4, 0);
				110	deltas = brw_imm_v(0x11001100);
				111	}
				112
				113	if (c->dispatch_width == 16) {
				114	dst = vec16(dst);
				115	}
				116
				117	/* We do this 8 or 16-wide, but since the destination is UW we
				118	* don't do compression in the 16-wide case.
				119	*/
				120	brw_push_insn_state(p);
				121	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				122	brw_ADD(p, dst, src, deltas);
				123	brw_pop_insn_state(p);
				124	}
				125
				126	void
				127	fs_visitor::generate_linterp(fs_inst *inst,
				128	struct brw_reg dst, struct brw_reg *src)
				129	{
				130	struct brw_reg delta_x = src[0];
				131	struct brw_reg delta_y = src[1];
				132	struct brw_reg interp = src[2];
				133
				134	if (brw->has_pln &&
				135	delta_y.nr == delta_x.nr + 1 &&
				136	(intel->gen >= 6 \|\| (delta_x.nr & 1) == 0)) {
				137	brw_PLN(p, dst, interp, delta_x);
				138	} else {
				139	brw_LINE(p, brw_null_reg(), interp, delta_x);
				140	brw_MAC(p, dst, suboffset(interp, 1), delta_y);
				141	}
				142	}
				143
				144	void
				145	fs_visitor::generate_math(fs_inst *inst,
				146	struct brw_reg dst, struct brw_reg *src)
				147	{
				148	int op;
				149
				150	switch (inst->opcode) {
				151	case FS_OPCODE_RCP:
				152	op = BRW_MATH_FUNCTION_INV;
				153	break;
				154	case FS_OPCODE_RSQ:
				155	op = BRW_MATH_FUNCTION_RSQ;
				156	break;
				157	case FS_OPCODE_SQRT:
				158	op = BRW_MATH_FUNCTION_SQRT;
				159	break;
				160	case FS_OPCODE_EXP2:
				161	op = BRW_MATH_FUNCTION_EXP;
				162	break;
				163	case FS_OPCODE_LOG2:
				164	op = BRW_MATH_FUNCTION_LOG;
				165	break;
				166	case FS_OPCODE_POW:
				167	op = BRW_MATH_FUNCTION_POW;
				168	break;
				169	case FS_OPCODE_SIN:
				170	op = BRW_MATH_FUNCTION_SIN;
				171	break;
				172	case FS_OPCODE_COS:
				173	op = BRW_MATH_FUNCTION_COS;
				174	break;
				175	default:
				176	assert(!"not reached: unknown math function");
				177	op = 0;
				178	break;
				179	}
				180
				181	if (intel->gen >= 6) {
				182	assert(inst->mlen == 0);
				183
				184	if (inst->opcode == FS_OPCODE_POW) {
				185	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				186	brw_math2(p, dst, op, src[0], src[1]);
				187
				188	if (c->dispatch_width == 16) {
				189	brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
				190	brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
				191	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
				192	}
				193	} else {
				194	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				195	brw_math(p, dst,
				196	op,
				197	inst->saturate ? BRW_MATH_SATURATE_SATURATE :
				198	BRW_MATH_SATURATE_NONE,
				199	0, src[0],
				200	BRW_MATH_DATA_VECTOR,
				201	BRW_MATH_PRECISION_FULL);
				202
				203	if (c->dispatch_width == 16) {
				204	brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
				205	brw_math(p, sechalf(dst),
				206	op,
				207	inst->saturate ? BRW_MATH_SATURATE_SATURATE :
				208	BRW_MATH_SATURATE_NONE,
				209	0, sechalf(src[0]),
				210	BRW_MATH_DATA_VECTOR,
				211	BRW_MATH_PRECISION_FULL);
				212	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
				213	}
				214	}
				215	} else /* gen <= 5 */{
				216	assert(inst->mlen >= 1);
				217
				218	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				219	brw_math(p, dst,
				220	op,
				221	inst->saturate ? BRW_MATH_SATURATE_SATURATE :
				222	BRW_MATH_SATURATE_NONE,
				223	inst->base_mrf, src[0],
				224	BRW_MATH_DATA_VECTOR,
				225	BRW_MATH_PRECISION_FULL);
				226
				227	if (c->dispatch_width == 16) {
				228	brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
				229	brw_math(p, sechalf(dst),
				230	op,
				231	inst->saturate ? BRW_MATH_SATURATE_SATURATE :
				232	BRW_MATH_SATURATE_NONE,
				233	inst->base_mrf + 1, sechalf(src[0]),
				234	BRW_MATH_DATA_VECTOR,
				235	BRW_MATH_PRECISION_FULL);
				236
				237	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
				238	}
				239	}
				240	}
				241
				242	void
				243	fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
				244	{
				245	int msg_type = -1;
				246	int rlen = 4;
				247	uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
				248
				249	if (c->dispatch_width == 16)
				250	simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
				251
				252	if (intel->gen >= 5) {
				253	switch (inst->opcode) {
				254	case FS_OPCODE_TEX:
				255	if (inst->shadow_compare) {
				256	msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
				257	} else {
				258	msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
				259	}
				260	break;
				261	case FS_OPCODE_TXB:
				262	if (inst->shadow_compare) {
				263	msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
				264	} else {
				265	msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
				266	}
				267	break;
				268	case FS_OPCODE_TXL:
				269	if (inst->shadow_compare) {
				270	msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
				271	} else {
				272	msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
				273	}
				274	break;
				275	case FS_OPCODE_TXD:
				276	assert(!"TXD isn't supported on gen5+ yet.");
				277	break;
				278	}
				279	} else {
				280	switch (inst->opcode) {
				281	case FS_OPCODE_TEX:
				282	/* Note that G45 and older determines shadow compare and dispatch width
				283	* from message length for most messages.
				284	*/
				285	assert(c->dispatch_width == 8);
				286	msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
				287	if (inst->shadow_compare) {
				288	assert(inst->mlen == 6);
				289	} else {
				290	assert(inst->mlen <= 4);
				291	}
				292	break;
				293	case FS_OPCODE_TXB:
				294	if (inst->shadow_compare) {
				295	assert(inst->mlen == 6);
				296	msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
				297	} else {
				298	assert(inst->mlen == 9);
				299	msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
				300	simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
				301	}
				302	break;
				303	case FS_OPCODE_TXL:
				304	if (inst->shadow_compare) {
				305	assert(inst->mlen == 6);
				306	msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
				307	} else {
				308	assert(inst->mlen == 9);
				309	msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
				310	simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
				311	}
				312	break;
				313	case FS_OPCODE_TXD:
				314	assert(!"TXD isn't supported on gen4 yet.");
				315	break;
				316	}
				317	}
				318	assert(msg_type != -1);
				319
				320	if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
				321	rlen = 8;
				322	dst = vec16(dst);
				323	}
				324
				325	brw_SAMPLE(p,
				326	retype(dst, BRW_REGISTER_TYPE_UW),
				327	inst->base_mrf,
				328	src,
				329	SURF_INDEX_TEXTURE(inst->sampler),
				330	inst->sampler,
				331	WRITEMASK_XYZW,
				332	msg_type,
				333	rlen,
				334	inst->mlen,
				335	0,
				336	inst->header_present,
				337	simd_mode);
				338	}
				339
				340
				341	/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
				342	* looking like:
				343	*
				344	* arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
				345	*
				346	* and we're trying to produce:
				347	*
				348	* DDX DDY
				349	* dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
				350	* (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
				351	* (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
				352	* (ss0.br - ss0.bl) (ss0.tr - ss0.br)
				353	* (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
				354	* (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
				355	* (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
				356	* (ss1.br - ss1.bl) (ss1.tr - ss1.br)
				357	*
				358	* and add another set of two more subspans if in 16-pixel dispatch mode.
				359	*
				360	* For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
				361	* for each pair, and vertstride = 2 jumps us 2 elements after processing a
				362	* pair. But for DDY, it's harder, as we want to produce the pairs swizzled
				363	* between each other. We could probably do it like ddx and swizzle the right
				364	* order later, but bail for now and just produce
				365	* ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
				366	*/
				367	void
				368	fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
				369	{
				370	struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
				371	BRW_REGISTER_TYPE_F,
				372	BRW_VERTICAL_STRIDE_2,
				373	BRW_WIDTH_2,
				374	BRW_HORIZONTAL_STRIDE_0,
				375	BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
				376	struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
				377	BRW_REGISTER_TYPE_F,
				378	BRW_VERTICAL_STRIDE_2,
				379	BRW_WIDTH_2,
				380	BRW_HORIZONTAL_STRIDE_0,
				381	BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
				382	brw_ADD(p, dst, src0, negate(src1));
				383	}
				384
				385	void
				386	fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
				387	{
				388	struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
				389	BRW_REGISTER_TYPE_F,
				390	BRW_VERTICAL_STRIDE_4,
				391	BRW_WIDTH_4,
				392	BRW_HORIZONTAL_STRIDE_0,
				393	BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
				394	struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
				395	BRW_REGISTER_TYPE_F,
				396	BRW_VERTICAL_STRIDE_4,
				397	BRW_WIDTH_4,
				398	BRW_HORIZONTAL_STRIDE_0,
				399	BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
				400	brw_ADD(p, dst, src0, negate(src1));
				401	}
				402
				403	void
				404	fs_visitor::generate_discard(fs_inst *inst)
				405	{
				406	struct brw_reg f0 = brw_flag_reg();
				407
				408	if (intel->gen >= 6) {
				409	struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
				410	struct brw_reg some_register;
				411
				412	/* As of gen6, we no longer have the mask register to look at,
				413	* so life gets a bit more complicated.
				414	*/
				415
				416	/* Load the flag register with all ones. */
				417	brw_push_insn_state(p);
				418	brw_set_mask_control(p, BRW_MASK_DISABLE);
				419	brw_MOV(p, f0, brw_imm_uw(0xffff));
				420	brw_pop_insn_state(p);
				421
				422	/* Do a comparison that should always fail, to produce 0s in the flag
				423	* reg where we have active channels.
				424	*/
				425	some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
				426	brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
				427	BRW_CONDITIONAL_NZ, some_register, some_register);
				428
				429	/* Undo CMP's whacking of predication*/
				430	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
				431
				432	brw_push_insn_state(p);
				433	brw_set_mask_control(p, BRW_MASK_DISABLE);
				434	brw_AND(p, g1, f0, g1);
				435	brw_pop_insn_state(p);
				436	} else {
				437	struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
				438
				439	brw_push_insn_state(p);
				440	brw_set_mask_control(p, BRW_MASK_DISABLE);
				441	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				442
				443	/* Unlike the 965, we have the mask reg, so we just need
				444	* somewhere to invert that (containing channels to be disabled)
				445	* so it can be ANDed with the mask of pixels still to be
				446	* written. Use the flag reg for consistency with gen6+.
				447	*/
				448	brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
				449	brw_AND(p, g0, f0, g0);
				450
				451	brw_pop_insn_state(p);
				452	}
				453	}
				454
				455	void
				456	fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
				457	{
				458	assert(inst->mlen != 0);
				459
				460	brw_MOV(p,
				461	retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
				462	retype(src, BRW_REGISTER_TYPE_UD));
				463	brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
				464	inst->offset);
				465	}
				466
				467	void
				468	fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
				469	{
				470	assert(inst->mlen != 0);
				471
				472	/* Clear any post destination dependencies that would be ignored by
				473	* the block read. See the B-Spec for pre-gen5 send instruction.
				474	*
				475	* This could use a better solution, since texture sampling and
				476	* math reads could potentially run into it as well -- anywhere
				477	* that we have a SEND with a destination that is a register that
				478	* was written but not read within the last N instructions (what's
				479	* N? unsure). This is rare because of dead code elimination, but
				480	* not impossible.
				481	*/
				482	if (intel->gen == 4 && !intel->is_g4x)
				483	brw_MOV(p, brw_null_reg(), dst);
				484
				485	brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
				486	inst->offset);
				487
				488	if (intel->gen == 4 && !intel->is_g4x) {
				489	/* gen4 errata: destination from a send can't be used as a
				490	* destination until it's been read. Just read it so we don't
				491	* have to worry.
				492	*/
				493	brw_MOV(p, brw_null_reg(), dst);
				494	}
				495	}
				496
				497	void
				498	fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
				499	{
				500	assert(inst->mlen != 0);
				501
				502	/* Clear any post destination dependencies that would be ignored by
				503	* the block read. See the B-Spec for pre-gen5 send instruction.
				504	*
				505	* This could use a better solution, since texture sampling and
				506	* math reads could potentially run into it as well -- anywhere
				507	* that we have a SEND with a destination that is a register that
				508	* was written but not read within the last N instructions (what's
				509	* N? unsure). This is rare because of dead code elimination, but
				510	* not impossible.
				511	*/
				512	if (intel->gen == 4 && !intel->is_g4x)
				513	brw_MOV(p, brw_null_reg(), dst);
				514
				515	brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
				516	inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
				517
				518	if (intel->gen == 4 && !intel->is_g4x) {
				519	/* gen4 errata: destination from a send can't be used as a
				520	* destination until it's been read. Just read it so we don't
				521	* have to worry.
				522	*/
				523	brw_MOV(p, brw_null_reg(), dst);
				524	}
				525	}
				526
				527	static struct brw_reg
				528	brw_reg_from_fs_reg(fs_reg *reg)
				529	{
				530	struct brw_reg brw_reg;
				531
				532	switch (reg->file) {
				533	case GRF:
				534	case ARF:
				535	case MRF:
				536	if (reg->smear == -1) {
				537	brw_reg = brw_vec8_reg(reg->file,
				538	reg->hw_reg, 0);
				539	} else {
				540	brw_reg = brw_vec1_reg(reg->file,
				541	reg->hw_reg, reg->smear);
				542	}
				543	brw_reg = retype(brw_reg, reg->type);
				544	if (reg->sechalf)
				545	brw_reg = sechalf(brw_reg);
				546	break;
				547	case IMM:
				548	switch (reg->type) {
				549	case BRW_REGISTER_TYPE_F:
				550	brw_reg = brw_imm_f(reg->imm.f);
				551	break;
				552	case BRW_REGISTER_TYPE_D:
				553	brw_reg = brw_imm_d(reg->imm.i);
				554	break;
				555	case BRW_REGISTER_TYPE_UD:
				556	brw_reg = brw_imm_ud(reg->imm.u);
				557	break;
				558	default:
				559	assert(!"not reached");
				560	brw_reg = brw_null_reg();
				561	break;
				562	}
				563	break;
				564	case FIXED_HW_REG:
				565	brw_reg = reg->fixed_hw_reg;
				566	break;
				567	case BAD_FILE:
				568	/* Probably unused. */
				569	brw_reg = brw_null_reg();
				570	break;
				571	case UNIFORM:
				572	assert(!"not reached");
				573	brw_reg = brw_null_reg();
				574	break;
				575	default:
				576	assert(!"not reached");
				577	brw_reg = brw_null_reg();
				578	break;
				579	}
				580	if (reg->abs)
				581	brw_reg = brw_abs(brw_reg);
				582	if (reg->negate)
				583	brw_reg = negate(brw_reg);
				584
				585	return brw_reg;
				586	}
				587
				588	void
				589	fs_visitor::generate_code()
				590	{
				591	int last_native_inst = p->nr_insn;
				592	const char *last_annotation_string = NULL;
				593	ir_instruction *last_annotation_ir = NULL;
				594
				595	int loop_stack_array_size = 16;
				596	int loop_stack_depth = 0;
				597	brw_instruction **loop_stack =
				598	rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
				599	int *if_depth_in_loop =
				600	rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
				601
				602
				603	if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
				604	printf("Native code for fragment shader %d (%d-wide dispatch):\n",
Eric Anholt	0653c45	2011-05-27 09:50:34 -0700	[diff] [blame^]	605	prog->Name, c->dispatch_width);
Eric Anholt	11dd9e9	2011-05-24 16:34:27 -0700	[diff] [blame]	606	}
				607
				608	foreach_iter(exec_list_iterator, iter, this->instructions) {
				609	fs_inst inst = (fs_inst )iter.get();
				610	struct brw_reg src[3], dst;
				611
				612	if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
				613	if (last_annotation_ir != inst->ir) {
				614	last_annotation_ir = inst->ir;
				615	if (last_annotation_ir) {
				616	printf(" ");
				617	last_annotation_ir->print();
				618	printf("\n");
				619	}
				620	}
				621	if (last_annotation_string != inst->annotation) {
				622	last_annotation_string = inst->annotation;
				623	if (last_annotation_string)
				624	printf(" %s\n", last_annotation_string);
				625	}
				626	}
				627
				628	for (unsigned int i = 0; i < 3; i++) {
				629	src[i] = brw_reg_from_fs_reg(&inst->src[i]);
				630	}
				631	dst = brw_reg_from_fs_reg(&inst->dst);
				632
				633	brw_set_conditionalmod(p, inst->conditional_mod);
				634	brw_set_predicate_control(p, inst->predicated);
				635	brw_set_predicate_inverse(p, inst->predicate_inverse);
				636	brw_set_saturate(p, inst->saturate);
				637
				638	if (inst->force_uncompressed \|\| c->dispatch_width == 8) {
				639	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
				640	} else if (inst->force_sechalf) {
				641	brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
				642	} else {
				643	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
				644	}
				645
				646	switch (inst->opcode) {
				647	case BRW_OPCODE_MOV:
				648	brw_MOV(p, dst, src[0]);
				649	break;
				650	case BRW_OPCODE_ADD:
				651	brw_ADD(p, dst, src[0], src[1]);
				652	break;
				653	case BRW_OPCODE_MUL:
				654	brw_MUL(p, dst, src[0], src[1]);
				655	break;
				656
				657	case BRW_OPCODE_FRC:
				658	brw_FRC(p, dst, src[0]);
				659	break;
				660	case BRW_OPCODE_RNDD:
				661	brw_RNDD(p, dst, src[0]);
				662	break;
				663	case BRW_OPCODE_RNDE:
				664	brw_RNDE(p, dst, src[0]);
				665	break;
				666	case BRW_OPCODE_RNDZ:
				667	brw_RNDZ(p, dst, src[0]);
				668	break;
				669
				670	case BRW_OPCODE_AND:
				671	brw_AND(p, dst, src[0], src[1]);
				672	break;
				673	case BRW_OPCODE_OR:
				674	brw_OR(p, dst, src[0], src[1]);
				675	break;
				676	case BRW_OPCODE_XOR:
				677	brw_XOR(p, dst, src[0], src[1]);
				678	break;
				679	case BRW_OPCODE_NOT:
				680	brw_NOT(p, dst, src[0]);
				681	break;
				682	case BRW_OPCODE_ASR:
				683	brw_ASR(p, dst, src[0], src[1]);
				684	break;
				685	case BRW_OPCODE_SHR:
				686	brw_SHR(p, dst, src[0], src[1]);
				687	break;
				688	case BRW_OPCODE_SHL:
				689	brw_SHL(p, dst, src[0], src[1]);
				690	break;
				691
				692	case BRW_OPCODE_CMP:
				693	brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
				694	break;
				695	case BRW_OPCODE_SEL:
				696	brw_SEL(p, dst, src[0], src[1]);
				697	break;
				698
				699	case BRW_OPCODE_IF:
				700	if (inst->src[0].file != BAD_FILE) {
				701	/* The instruction has an embedded compare (only allowed on gen6) */
				702	assert(intel->gen == 6);
				703	gen6_IF(p, inst->conditional_mod, src[0], src[1]);
				704	} else {
				705	brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
				706	}
				707	if_depth_in_loop[loop_stack_depth]++;
				708	break;
				709
				710	case BRW_OPCODE_ELSE:
				711	brw_ELSE(p);
				712	break;
				713	case BRW_OPCODE_ENDIF:
				714	brw_ENDIF(p);
				715	if_depth_in_loop[loop_stack_depth]--;
				716	break;
				717
				718	case BRW_OPCODE_DO:
				719	loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
				720	if (loop_stack_array_size <= loop_stack_depth) {
				721	loop_stack_array_size *= 2;
				722	loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
				723	loop_stack_array_size);
				724	if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
				725	loop_stack_array_size);
				726	}
				727	if_depth_in_loop[loop_stack_depth] = 0;
				728	break;
				729
				730	case BRW_OPCODE_BREAK:
				731	brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
				732	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
				733	break;
				734	case BRW_OPCODE_CONTINUE:
				735	/* FINISHME: We need to write the loop instruction support still. */
				736	if (intel->gen >= 6)
				737	gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
				738	else
				739	brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
				740	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
				741	break;
				742
				743	case BRW_OPCODE_WHILE: {
				744	struct brw_instruction inst0, inst1;
				745	GLuint br = 1;
				746
				747	if (intel->gen >= 5)
				748	br = 2;
				749
				750	assert(loop_stack_depth > 0);
				751	loop_stack_depth--;
				752	inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
				753	if (intel->gen < 6) {
				754	/* patch all the BREAK/CONT instructions from last BGNLOOP */
				755	while (inst0 > loop_stack[loop_stack_depth]) {
				756	inst0--;
				757	if (inst0->header.opcode == BRW_OPCODE_BREAK &&
				758	inst0->bits3.if_else.jump_count == 0) {
				759	inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
				760	}
				761	else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
				762	inst0->bits3.if_else.jump_count == 0) {
				763	inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
				764	}
				765	}
				766	}
				767	}
				768	break;
				769
				770	case FS_OPCODE_RCP:
				771	case FS_OPCODE_RSQ:
				772	case FS_OPCODE_SQRT:
				773	case FS_OPCODE_EXP2:
				774	case FS_OPCODE_LOG2:
				775	case FS_OPCODE_POW:
				776	case FS_OPCODE_SIN:
				777	case FS_OPCODE_COS:
				778	generate_math(inst, dst, src);
				779	break;
				780	case FS_OPCODE_PIXEL_X:
				781	generate_pixel_xy(dst, true);
				782	break;
				783	case FS_OPCODE_PIXEL_Y:
				784	generate_pixel_xy(dst, false);
				785	break;
				786	case FS_OPCODE_CINTERP:
				787	brw_MOV(p, dst, src[0]);
				788	break;
				789	case FS_OPCODE_LINTERP:
				790	generate_linterp(inst, dst, src);
				791	break;
				792	case FS_OPCODE_TEX:
				793	case FS_OPCODE_TXB:
				794	case FS_OPCODE_TXD:
				795	case FS_OPCODE_TXL:
				796	generate_tex(inst, dst, src[0]);
				797	break;
				798	case FS_OPCODE_DISCARD:
				799	generate_discard(inst);
				800	break;
				801	case FS_OPCODE_DDX:
				802	generate_ddx(inst, dst, src[0]);
				803	break;
				804	case FS_OPCODE_DDY:
				805	generate_ddy(inst, dst, src[0]);
				806	break;
				807
				808	case FS_OPCODE_SPILL:
				809	generate_spill(inst, src[0]);
				810	break;
				811
				812	case FS_OPCODE_UNSPILL:
				813	generate_unspill(inst, dst);
				814	break;
				815
				816	case FS_OPCODE_PULL_CONSTANT_LOAD:
				817	generate_pull_constant_load(inst, dst);
				818	break;
				819
				820	case FS_OPCODE_FB_WRITE:
				821	generate_fb_write(inst);
				822	break;
				823	default:
				824	if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
				825	_mesa_problem(ctx, "Unsupported opcode `%s' in FS",
				826	brw_opcodes[inst->opcode].name);
				827	} else {
				828	_mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
				829	}
				830	fail("unsupported opcode in FS\n");
				831	}
				832
				833	if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
				834	for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
				835	if (0) {
				836	printf("0x%08x 0x%08x 0x%08x 0x%08x ",
				837	((uint32_t *)&p->store[i])[3],
				838	((uint32_t *)&p->store[i])[2],
				839	((uint32_t *)&p->store[i])[1],
				840	((uint32_t *)&p->store[i])[0]);
				841	}
				842	brw_disasm(stdout, &p->store[i], intel->gen);
				843	}
				844	}
				845
				846	last_native_inst = p->nr_insn;
				847	}
				848
				849	if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
				850	printf("\n");
				851	}
				852
				853	ralloc_free(loop_stack);
				854	ralloc_free(if_depth_in_loop);
				855
				856	brw_set_uip_jip(p);
				857
				858	/* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
				859	* emit issues, it doesn't get the jump distances into the output,
				860	* which is often something we want to debug. So this is here in
				861	* case you're doing that.
				862	*/
				863	if (0) {
				864	if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
				865	for (unsigned int i = 0; i < p->nr_insn; i++) {
				866	printf("0x%08x 0x%08x 0x%08x 0x%08x ",
				867	((uint32_t *)&p->store[i])[3],
				868	((uint32_t *)&p->store[i])[2],
				869	((uint32_t *)&p->store[i])[1],
				870	((uint32_t *)&p->store[i])[0]);
				871	brw_disasm(stdout, &p->store[i], intel->gen);
				872	}
				873	}
				874	}
				875	}