Blame - src/mesa/shader/prog_execute.c - platform/external/mesa3d

blob: 1b7ed4c5d0e3d6df5d70fc11141cb31ae2405c82 [file] [log] [blame]

Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1	/*
				2	* Mesa 3-D graphics library
				3	* Version: 6.5.3
				4	*
				5	* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
				6	*
				7	* Permission is hereby granted, free of charge, to any person obtaining a
				8	* copy of this software and associated documentation files (the "Software"),
				9	* to deal in the Software without restriction, including without limitation
				10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				11	* and/or sell copies of the Software, and to permit persons to whom the
				12	* Software is furnished to do so, subject to the following conditions:
				13	*
				14	* The above copyright notice and this permission notice shall be included
				15	* in all copies or substantial portions of the Software.
				16	*
				17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
				18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				20	* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
				21	* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				22	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
				23	*/
				24
				25	/**
				26	* \file prog_execute.c
				27	* Software interpreter for vertex/fragment programs.
				28	* \author Brian Paul
				29	*/
				30
				31	/*
				32	* NOTE: we do everything in single-precision floating point; we don't
				33	* currently observe the single/half/fixed-precision qualifiers.
				34	*
				35	*/
				36
				37
				38	#include "glheader.h"
				39	#include "colormac.h"
				40	#include "context.h"
				41	#include "program.h"
				42	#include "prog_execute.h"
				43	#include "prog_instruction.h"
				44	#include "prog_parameter.h"
				45	#include "prog_print.h"
				46	#include "slang_library_noise.h"
				47
				48
				49	/* See comments below for info about this */
				50	#define LAMBDA_ZERO 1
				51
				52	/* debug predicate */
				53	#define DEBUG_PROG 0
				54
				55
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	56	/**
				57	* Set x to positive or negative infinity.
				58	*/
				59	#if defined(USE_IEEE) \|\| defined(_WIN32)
				60	#define SET_POS_INFINITY(x) ( ((GLuint ) (void *)&x) = 0x7F800000 )
				61	#define SET_NEG_INFINITY(x) ( ((GLuint ) (void *)&x) = 0xFF800000 )
				62	#elif defined(VMS)
				63	#define SET_POS_INFINITY(x) x = __MAXFLOAT
				64	#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
				65	#else
				66	#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
				67	#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
				68	#endif
				69
				70	#define SET_FLOAT_BITS(x, bits) ((fi_type ) (void ) &(x))->i = bits
				71
				72
				73	static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
				74
				75
				76
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	77	#if FEATURE_MESA_program_debug
				78	static struct gl_program_machine *CurrentMachine = NULL;
				79
				80	/**
				81	* For GL_MESA_program_debug.
				82	* Return current value (4*GLfloat) of a program register.
				83	* Called via ctx->Driver.GetFragmentProgramRegister().
				84	*/
				85	void
				86	_mesa_get_program_register(GLcontext *ctx, enum register_file file,
				87	GLuint index, GLfloat val[4])
				88	{
				89	if (CurrentMachine) {
				90	switch (file) {
				91	case PROGRAM_INPUT:
				92	if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
				93	COPY_4V(val, CurrentMachine->VertAttribs[index]);
				94	}
				95	else {
				96	COPY_4V(val,
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	97	CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	98	}
				99	break;
				100	case PROGRAM_OUTPUT:
				101	COPY_4V(val, CurrentMachine->Outputs[index]);
				102	break;
				103	case PROGRAM_TEMPORARY:
				104	COPY_4V(val, CurrentMachine->Temporaries[index]);
				105	break;
				106	default:
				107	_mesa_problem(NULL,
				108	"bad register file in _swrast_get_program_register");
				109	}
				110	}
				111	}
				112	#endif /* FEATURE_MESA_program_debug */
				113
				114
				115
				116	/**
				117	* Return a pointer to the 4-element float vector specified by the given
				118	* source register.
				119	*/
				120	static INLINE const GLfloat *
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	121	get_register_pointer(GLcontext * ctx,
				122	const struct prog_src_register *source,
				123	const struct gl_program_machine *machine)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	124	{
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	125	if (source->RelAddr) {
				126	const GLint reg = source->Index + machine->AddressReg[0][0];
				127	ASSERT( (source->File == PROGRAM_ENV_PARAM) \|\|
				128	(source->File == PROGRAM_STATE_VAR) );
				129	if (reg < 0 \|\| reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
				130	return ZeroVec;
				131	else if (source->File == PROGRAM_ENV_PARAM)
				132	return ctx->VertexProgram.Parameters[reg];
				133	else {
Brian	761728a	2007-02-24 11:14:57 -0700	[diff] [blame]	134	ASSERT(source->File == PROGRAM_LOCAL_PARAM \|\|
				135	source->File == PROGRAM_STATE_VAR);
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	136	return machine->CurProgram->Parameters->ParameterValues[reg];
				137	}
				138	}
				139
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	140	switch (source->File) {
				141	case PROGRAM_TEMPORARY:
				142	ASSERT(source->Index < MAX_PROGRAM_TEMPS);
				143	return machine->Temporaries[source->Index];
				144
				145	case PROGRAM_INPUT:
				146	if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
				147	ASSERT(source->Index < VERT_ATTRIB_MAX);
				148	return machine->VertAttribs[source->Index];
				149	}
				150	else {
				151	ASSERT(source->Index < FRAG_ATTRIB_MAX);
				152	return machine->Attribs[source->Index][machine->CurElement];
				153	}
				154
				155	case PROGRAM_OUTPUT:
Brian	292a804	2007-02-24 15:49:54 -0700	[diff] [blame]	156	ASSERT(source->Index < MAX_PROGRAM_OUTPUTS);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	157	return machine->Outputs[source->Index];
				158
				159	case PROGRAM_LOCAL_PARAM:
				160	ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
				161	return machine->CurProgram->LocalParams[source->Index];
				162
				163	case PROGRAM_ENV_PARAM:
				164	ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
				165	if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
				166	return ctx->VertexProgram.Parameters[source->Index];
				167	else
				168	return ctx->FragmentProgram.Parameters[source->Index];
				169
				170	case PROGRAM_STATE_VAR:
				171	/* Fallthrough */
				172	case PROGRAM_CONSTANT:
				173	/* Fallthrough */
				174	case PROGRAM_UNIFORM:
				175	/* Fallthrough */
				176	case PROGRAM_NAMED_PARAM:
				177	ASSERT(source->Index <
				178	(GLint) machine->CurProgram->Parameters->NumParameters);
				179	return machine->CurProgram->Parameters->ParameterValues[source->Index];
				180
				181	default:
				182	_mesa_problem(ctx,
				183	"Invalid input register file %d in get_register_pointer()",
				184	source->File);
				185	return NULL;
				186	}
				187	}
				188
				189
				190	/**
				191	* Fetch a 4-element float vector from the given source register.
				192	* Apply swizzling and negating as needed.
				193	*/
				194	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	195	fetch_vector4(GLcontext * ctx,
				196	const struct prog_src_register *source,
				197	const struct gl_program_machine *machine, GLfloat result[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	198	{
				199	const GLfloat *src = get_register_pointer(ctx, source, machine);
				200	ASSERT(src);
				201
				202	if (source->Swizzle == SWIZZLE_NOOP) {
				203	/* no swizzling */
				204	COPY_4V(result, src);
				205	}
				206	else {
				207	ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
				208	ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
				209	ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
				210	ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
				211	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				212	result[1] = src[GET_SWZ(source->Swizzle, 1)];
				213	result[2] = src[GET_SWZ(source->Swizzle, 2)];
				214	result[3] = src[GET_SWZ(source->Swizzle, 3)];
				215	}
				216
				217	if (source->NegateBase) {
				218	result[0] = -result[0];
				219	result[1] = -result[1];
				220	result[2] = -result[2];
				221	result[3] = -result[3];
				222	}
				223	if (source->Abs) {
				224	result[0] = FABSF(result[0]);
				225	result[1] = FABSF(result[1]);
				226	result[2] = FABSF(result[2]);
				227	result[3] = FABSF(result[3]);
				228	}
				229	if (source->NegateAbs) {
				230	result[0] = -result[0];
				231	result[1] = -result[1];
				232	result[2] = -result[2];
				233	result[3] = -result[3];
				234	}
				235	}
				236
				237	#if 0
				238	/**
				239	* Fetch the derivative with respect to X for the given register.
				240	* \return GL_TRUE if it was easily computed or GL_FALSE if we
				241	* need to execute another instance of the program (ugh)!
				242	*/
				243	static GLboolean
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	244	fetch_vector4_deriv(GLcontext * ctx,
				245	const struct prog_src_register *source,
				246	const SWspan * span,
				247	char xOrY, GLint column, GLfloat result[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	248	{
				249	GLfloat src[4];
				250
				251	ASSERT(xOrY == 'X' \|\| xOrY == 'Y');
				252
				253	switch (source->Index) {
				254	case FRAG_ATTRIB_WPOS:
				255	if (xOrY == 'X') {
				256	src[0] = 1.0;
				257	src[1] = 0.0;
				258	src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	259	/ ctx->DrawBuffer->_DepthMaxF;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	260	src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
				261	}
				262	else {
				263	src[0] = 0.0;
				264	src[1] = 1.0;
				265	src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	266	/ ctx->DrawBuffer->_DepthMaxF;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	267	src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
				268	}
				269	break;
				270	case FRAG_ATTRIB_COL0:
				271	case FRAG_ATTRIB_COL1:
				272	if (xOrY == 'X') {
				273	src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
				274	src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
				275	src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
				276	src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
				277	}
				278	else {
				279	src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
				280	src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
				281	src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
				282	src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
				283	}
				284	break;
				285	case FRAG_ATTRIB_FOGC:
				286	if (xOrY == 'X') {
				287	src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
				288	src[1] = 0.0;
				289	src[2] = 0.0;
				290	src[3] = 0.0;
				291	}
				292	else {
				293	src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
				294	src[1] = 0.0;
				295	src[2] = 0.0;
				296	src[3] = 0.0;
				297	}
				298	break;
				299	default:
				300	assert(source->Index < FRAG_ATTRIB_MAX);
				301	/* texcoord or varying */
				302	if (xOrY == 'X') {
				303	/* this is a little tricky - I think I've got it right */
				304	const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	305	+
				306	span->attrStepX[source->Index][3] *
				307	column);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	308	src[0] = span->attrStepX[source->Index][0] * invQ;
				309	src[1] = span->attrStepX[source->Index][1] * invQ;
				310	src[2] = span->attrStepX[source->Index][2] * invQ;
				311	src[3] = span->attrStepX[source->Index][3] * invQ;
				312	}
				313	else {
				314	/* Tricky, as above, but in Y direction */
				315	const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
				316	+ span->attrStepY[source->Index][3]);
				317	src[0] = span->attrStepY[source->Index][0] * invQ;
				318	src[1] = span->attrStepY[source->Index][1] * invQ;
				319	src[2] = span->attrStepY[source->Index][2] * invQ;
				320	src[3] = span->attrStepY[source->Index][3] * invQ;
				321	}
				322	break;
				323	}
				324
				325	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				326	result[1] = src[GET_SWZ(source->Swizzle, 1)];
				327	result[2] = src[GET_SWZ(source->Swizzle, 2)];
				328	result[3] = src[GET_SWZ(source->Swizzle, 3)];
				329
				330	if (source->NegateBase) {
				331	result[0] = -result[0];
				332	result[1] = -result[1];
				333	result[2] = -result[2];
				334	result[3] = -result[3];
				335	}
				336	if (source->Abs) {
				337	result[0] = FABSF(result[0]);
				338	result[1] = FABSF(result[1]);
				339	result[2] = FABSF(result[2]);
				340	result[3] = FABSF(result[3]);
				341	}
				342	if (source->NegateAbs) {
				343	result[0] = -result[0];
				344	result[1] = -result[1];
				345	result[2] = -result[2];
				346	result[3] = -result[3];
				347	}
				348	return GL_TRUE;
				349	}
				350	#endif
				351
				352
				353	/**
				354	* As above, but only return result[0] element.
				355	*/
				356	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	357	fetch_vector1(GLcontext * ctx,
				358	const struct prog_src_register *source,
				359	const struct gl_program_machine *machine, GLfloat result[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	360	{
				361	const GLfloat *src = get_register_pointer(ctx, source, machine);
				362	ASSERT(src);
				363
				364	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				365
				366	if (source->NegateBase) {
				367	result[0] = -result[0];
				368	}
				369	if (source->Abs) {
				370	result[0] = FABSF(result[0]);
				371	}
				372	if (source->NegateAbs) {
				373	result[0] = -result[0];
				374	}
				375	}
				376
				377
				378	/**
				379	* Test value against zero and return GT, LT, EQ or UN if NaN.
				380	*/
				381	static INLINE GLuint
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	382	generate_cc(float value)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	383	{
				384	if (value != value)
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	385	return COND_UN; /* NaN */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	386	if (value > 0.0F)
				387	return COND_GT;
				388	if (value < 0.0F)
				389	return COND_LT;
				390	return COND_EQ;
				391	}
				392
				393
				394	/**
				395	* Test if the ccMaskRule is satisfied by the given condition code.
				396	* Used to mask destination writes according to the current condition code.
				397	*/
				398	static INLINE GLboolean
				399	test_cc(GLuint condCode, GLuint ccMaskRule)
				400	{
				401	switch (ccMaskRule) {
				402	case COND_EQ: return (condCode == COND_EQ);
				403	case COND_NE: return (condCode != COND_EQ);
				404	case COND_LT: return (condCode == COND_LT);
				405	case COND_GE: return (condCode == COND_GT \|\| condCode == COND_EQ);
				406	case COND_LE: return (condCode == COND_LT \|\| condCode == COND_EQ);
				407	case COND_GT: return (condCode == COND_GT);
				408	case COND_TR: return GL_TRUE;
				409	case COND_FL: return GL_FALSE;
				410	default: return GL_TRUE;
				411	}
				412	}
				413
				414
				415	/**
				416	* Evaluate the 4 condition codes against a predicate and return GL_TRUE
				417	* or GL_FALSE to indicate result.
				418	*/
				419	static INLINE GLboolean
				420	eval_condition(const struct gl_program_machine *machine,
				421	const struct prog_instruction *inst)
				422	{
				423	const GLuint swizzle = inst->DstReg.CondSwizzle;
				424	const GLuint condMask = inst->DstReg.CondMask;
				425	if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) \|\|
				426	test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) \|\|
				427	test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) \|\|
				428	test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
				429	return GL_TRUE;
				430	}
				431	else {
				432	return GL_FALSE;
				433	}
				434	}
				435
				436
				437
				438	/**
				439	* Store 4 floats into a register. Observe the instructions saturate and
				440	* set-condition-code flags.
				441	*/
				442	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	443	store_vector4(const struct prog_instruction *inst,
				444	struct gl_program_machine *machine, const GLfloat value[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	445	{
				446	const struct prog_dst_register *dest = &(inst->DstReg);
				447	const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
				448	GLfloat *dstReg;
				449	GLfloat dummyReg[4];
				450	GLfloat clampedValue[4];
				451	GLuint writeMask = dest->WriteMask;
				452
				453	switch (dest->File) {
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	454	case PROGRAM_OUTPUT:
Brian	292a804	2007-02-24 15:49:54 -0700	[diff] [blame]	455	ASSERT(dest->Index < MAX_PROGRAM_OUTPUTS);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	456	dstReg = machine->Outputs[dest->Index];
				457	break;
				458	case PROGRAM_TEMPORARY:
Brian	292a804	2007-02-24 15:49:54 -0700	[diff] [blame]	459	ASSERT(dest->Index < MAX_PROGRAM_TEMPS);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	460	dstReg = machine->Temporaries[dest->Index];
				461	break;
				462	case PROGRAM_WRITE_ONLY:
				463	dstReg = dummyReg;
				464	return;
				465	default:
				466	_mesa_problem(NULL, "bad register file in store_vector4(fp)");
				467	return;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	468	}
				469
				470	#if 0
				471	if (value[0] > 1.0e10 \|\|
				472	IS_INF_OR_NAN(value[0]) \|\|
				473	IS_INF_OR_NAN(value[1]) \|\|
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	474	IS_INF_OR_NAN(value[2]) \|\| IS_INF_OR_NAN(value[3]))
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	475	printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
				476	#endif
				477
				478	if (clamp) {
				479	clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
				480	clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
				481	clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
				482	clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
				483	value = clampedValue;
				484	}
				485
				486	if (dest->CondMask != COND_TR) {
				487	/* condition codes may turn off some writes */
				488	if (writeMask & WRITEMASK_X) {
				489	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
				490	dest->CondMask))
				491	writeMask &= ~WRITEMASK_X;
				492	}
				493	if (writeMask & WRITEMASK_Y) {
				494	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
				495	dest->CondMask))
				496	writeMask &= ~WRITEMASK_Y;
				497	}
				498	if (writeMask & WRITEMASK_Z) {
				499	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
				500	dest->CondMask))
				501	writeMask &= ~WRITEMASK_Z;
				502	}
				503	if (writeMask & WRITEMASK_W) {
				504	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
				505	dest->CondMask))
				506	writeMask &= ~WRITEMASK_W;
				507	}
				508	}
				509
				510	if (writeMask & WRITEMASK_X)
				511	dstReg[0] = value[0];
				512	if (writeMask & WRITEMASK_Y)
				513	dstReg[1] = value[1];
				514	if (writeMask & WRITEMASK_Z)
				515	dstReg[2] = value[2];
				516	if (writeMask & WRITEMASK_W)
				517	dstReg[3] = value[3];
				518
				519	if (inst->CondUpdate) {
				520	if (writeMask & WRITEMASK_X)
				521	machine->CondCodes[0] = generate_cc(value[0]);
				522	if (writeMask & WRITEMASK_Y)
				523	machine->CondCodes[1] = generate_cc(value[1]);
				524	if (writeMask & WRITEMASK_Z)
				525	machine->CondCodes[2] = generate_cc(value[2]);
				526	if (writeMask & WRITEMASK_W)
				527	machine->CondCodes[3] = generate_cc(value[3]);
				528	}
				529	}
				530
				531
				532	#if 0
				533	/**
				534	* Initialize a new machine state instance from an existing one, adding
				535	* the partial derivatives onto the input registers.
				536	* Used to implement DDX and DDY instructions in non-trivial cases.
				537	*/
				538	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	539	init_machine_deriv(GLcontext * ctx,
				540	const struct gl_program_machine *machine,
				541	const struct gl_fragment_program *program,
				542	const SWspan * span, char xOrY,
				543	struct gl_program_machine *dMachine)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	544	{
				545	GLuint attr;
				546
				547	ASSERT(xOrY == 'X' \|\| xOrY == 'Y');
				548
				549	/* copy existing machine */
				550	_mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
				551
				552	if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
				553	/* XXX also need to do this when using valgrind */
				554	/* Clear temporary registers (undefined for ARB_f_p) */
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	555	_mesa_bzero((void *) machine->Temporaries,
				556	MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	557	}
				558
				559	/* Add derivatives */
				560	if (program->Base.InputsRead & FRAG_BIT_WPOS) {
				561	GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
				562	if (xOrY == 'X') {
				563	wpos[0] += 1.0F;
				564	wpos[1] += 0.0F;
				565	wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
				566	wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
				567	}
				568	else {
				569	wpos[0] += 0.0F;
				570	wpos[1] += 1.0F;
				571	wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
				572	wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
				573	}
				574	}
				575
				576	/* primary, secondary colors */
				577	for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
				578	if (program->Base.InputsRead & (1 << attr)) {
				579	GLfloat *col = machine->Attribs[attr][machine->CurElement];
				580	if (xOrY == 'X') {
				581	col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
				582	col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
				583	col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
				584	col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
				585	}
				586	else {
				587	col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
				588	col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
				589	col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
				590	col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
				591	}
				592	}
				593	}
				594	if (program->Base.InputsRead & FRAG_BIT_FOGC) {
				595	GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
				596	if (xOrY == 'X') {
				597	fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
				598	}
				599	else {
				600	fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
				601	}
				602	}
				603	/* texcoord and varying vars */
				604	for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
				605	if (program->Base.InputsRead & (1 << attr)) {
				606	GLfloat *val = machine->Attribs[attr][machine->CurElement];
				607	/* XXX perspective-correct interpolation */
				608	if (xOrY == 'X') {
				609	val[0] += span->attrStepX[attr][0];
				610	val[1] += span->attrStepX[attr][1];
				611	val[2] += span->attrStepX[attr][2];
				612	val[3] += span->attrStepX[attr][3];
				613	}
				614	else {
				615	val[0] += span->attrStepY[attr][0];
				616	val[1] += span->attrStepY[attr][1];
				617	val[2] += span->attrStepY[attr][2];
				618	val[3] += span->attrStepY[attr][3];
				619	}
				620	}
				621	}
				622
				623	/* init condition codes */
				624	dMachine->CondCodes[0] = COND_EQ;
				625	dMachine->CondCodes[1] = COND_EQ;
				626	dMachine->CondCodes[2] = COND_EQ;
				627	dMachine->CondCodes[3] = COND_EQ;
				628	}
				629	#endif
				630
				631
				632	/**
				633	* Execute the given vertex/fragment program.
				634	*
				635	* \param ctx - rendering context
				636	* \param program - the fragment program to execute
				637	* \param machine - machine state (register file)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	638	* \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
				639	*/
				640	GLboolean
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	641	_mesa_execute_program(GLcontext * ctx,
Brian	8b34b7d	2007-02-25 18:26:50 -0700	[diff] [blame]	642	const struct gl_program *program,
Brian	085d7d5	2007-02-25 18:23:37 -0700	[diff] [blame]	643	struct gl_program_machine *machine)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	644	{
Brian	8b34b7d	2007-02-25 18:26:50 -0700	[diff] [blame]	645	const GLuint numInst = program->NumInstructions;
Brian	cfd0011	2007-02-25 18:30:45 -0700	[diff] [blame]	646	const GLuint maxExec = 10000;
				647	GLint pc, numExec = 0;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	648
				649	machine->CurProgram = program;
				650
				651	if (DEBUG_PROG) {
				652	printf("execute program %u --------------------\n", program->Id);
				653	}
				654
				655	#if FEATURE_MESA_program_debug
				656	CurrentMachine = machine;
				657	#endif
				658
Brian	8b34b7d	2007-02-25 18:26:50 -0700	[diff] [blame]	659	for (pc = 0; pc < numInst; pc++) {
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	660	const struct prog_instruction *inst = program->Instructions + pc;
				661
				662	#if FEATURE_MESA_program_debug
				663	if (ctx->FragmentProgram.CallbackEnabled &&
				664	ctx->FragmentProgram.Callback) {
				665	ctx->FragmentProgram.CurrentPosition = inst->StringPos;
				666	ctx->FragmentProgram.Callback(program->Target,
				667	ctx->FragmentProgram.CallbackData);
				668	}
				669	#endif
				670
				671	if (DEBUG_PROG) {
				672	_mesa_print_instruction(inst);
				673	}
				674
				675	switch (inst->Opcode) {
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	676	case OPCODE_ABS:
				677	{
				678	GLfloat a[4], result[4];
				679	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				680	result[0] = FABSF(a[0]);
				681	result[1] = FABSF(a[1]);
				682	result[2] = FABSF(a[2]);
				683	result[3] = FABSF(a[3]);
				684	store_vector4(inst, machine, result);
				685	}
				686	break;
				687	case OPCODE_ADD:
				688	{
				689	GLfloat a[4], b[4], result[4];
				690	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				691	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				692	result[0] = a[0] + b[0];
				693	result[1] = a[1] + b[1];
				694	result[2] = a[2] + b[2];
				695	result[3] = a[3] + b[3];
				696	store_vector4(inst, machine, result);
				697	if (DEBUG_PROG) {
				698	printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
				699	result[0], result[1], result[2], result[3],
				700	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	701	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	702	}
				703	break;
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	704	case OPCODE_ARL:
				705	{
				706	GLfloat t[4];
				707	fetch_vector4(ctx, &inst->SrcReg[0], machine, t);
				708	machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
				709	}
				710	break;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	711	case OPCODE_BGNLOOP:
				712	/* no-op */
				713	break;
				714	case OPCODE_ENDLOOP:
				715	/* subtract 1 here since pc is incremented by for(pc) loop */
				716	pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
				717	break;
				718	case OPCODE_BGNSUB: /* begin subroutine */
				719	break;
				720	case OPCODE_ENDSUB: /* end subroutine */
				721	break;
				722	case OPCODE_BRA: /* branch (conditional) */
				723	/* fall-through */
				724	case OPCODE_BRK: /* break out of loop (conditional) */
				725	/* fall-through */
				726	case OPCODE_CONT: /* continue loop (conditional) */
				727	if (eval_condition(machine, inst)) {
				728	/* take branch */
				729	/* Subtract 1 here since we'll do pc++ at end of for-loop */
				730	pc = inst->BranchTarget - 1;
				731	}
				732	break;
				733	case OPCODE_CAL: /* Call subroutine (conditional) */
				734	if (eval_condition(machine, inst)) {
				735	/* call the subroutine */
				736	if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
				737	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	738	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	739	machine->CallStack[machine->StackDepth++] = pc + 1;
				740	pc = inst->BranchTarget; /* XXX - 1 ??? */
				741	}
				742	break;
				743	case OPCODE_CMP:
				744	{
				745	GLfloat a[4], b[4], c[4], result[4];
				746	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				747	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				748	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				749	result[0] = a[0] < 0.0F ? b[0] : c[0];
				750	result[1] = a[1] < 0.0F ? b[1] : c[1];
				751	result[2] = a[2] < 0.0F ? b[2] : c[2];
				752	result[3] = a[3] < 0.0F ? b[3] : c[3];
				753	store_vector4(inst, machine, result);
				754	}
				755	break;
				756	case OPCODE_COS:
				757	{
				758	GLfloat a[4], result[4];
				759	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				760	result[0] = result[1] = result[2] = result[3]
				761	= (GLfloat) _mesa_cos(a[0]);
				762	store_vector4(inst, machine, result);
				763	}
				764	break;
				765	case OPCODE_DDX: /* Partial derivative with respect to X */
				766	{
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	767	#if 0
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	768	GLfloat a[4], aNext[4], result[4];
				769	struct gl_program_machine dMachine;
				770	if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
				771	column, result)) {
				772	/* This is tricky. Make a copy of the current machine state,
				773	* increment the input registers by the dx or dy partial
				774	* derivatives, then re-execute the program up to the
				775	* preceeding instruction, then fetch the source register.
				776	* Finally, find the difference in the register values for
				777	* the original and derivative runs.
				778	*/
				779	fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
				780	init_machine_deriv(ctx, machine, program, span,
				781	'X', &dMachine);
				782	execute_program(ctx, program, pc, &dMachine, span, column);
				783	fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
				784	aNext);
				785	result[0] = aNext[0] - a[0];
				786	result[1] = aNext[1] - a[1];
				787	result[2] = aNext[2] - a[2];
				788	result[3] = aNext[3] - a[3];
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	789	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	790	store_vector4(inst, machine, result);
				791	#else
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	792	store_vector4(inst, machine, ZeroVec);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	793	#endif
				794	}
				795	break;
				796	case OPCODE_DDY: /* Partial derivative with respect to Y */
				797	{
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	798	#if 0
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	799	GLfloat a[4], aNext[4], result[4];
				800	struct gl_program_machine dMachine;
				801	if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
				802	column, result)) {
				803	init_machine_deriv(ctx, machine, program, span,
				804	'Y', &dMachine);
				805	fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
				806	execute_program(ctx, program, pc, &dMachine, span, column);
				807	fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
				808	aNext);
				809	result[0] = aNext[0] - a[0];
				810	result[1] = aNext[1] - a[1];
				811	result[2] = aNext[2] - a[2];
				812	result[3] = aNext[3] - a[3];
				813	}
				814	store_vector4(inst, machine, result);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	815	#else
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	816	store_vector4(inst, machine, ZeroVec);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	817	#endif
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	818	}
				819	break;
				820	case OPCODE_DP3:
				821	{
				822	GLfloat a[4], b[4], result[4];
				823	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				824	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				825	result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
				826	store_vector4(inst, machine, result);
				827	if (DEBUG_PROG) {
				828	printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
				829	result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	830	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	831	}
				832	break;
				833	case OPCODE_DP4:
				834	{
				835	GLfloat a[4], b[4], result[4];
				836	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				837	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				838	result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
				839	store_vector4(inst, machine, result);
				840	if (DEBUG_PROG) {
				841	printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
				842	result[0], a[0], a[1], a[2], a[3],
				843	b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	844	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	845	}
				846	break;
				847	case OPCODE_DPH:
				848	{
				849	GLfloat a[4], b[4], result[4];
				850	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				851	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				852	result[0] = result[1] = result[2] = result[3] =
				853	a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
				854	store_vector4(inst, machine, result);
				855	}
				856	break;
				857	case OPCODE_DST: /* Distance vector */
				858	{
				859	GLfloat a[4], b[4], result[4];
				860	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				861	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				862	result[0] = 1.0F;
				863	result[1] = a[1] * b[1];
				864	result[2] = a[2];
				865	result[3] = b[3];
				866	store_vector4(inst, machine, result);
				867	}
				868	break;
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	869	case OPCODE_EXP:
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	870	{
				871	GLfloat t[4], q[4], floor_t0;
				872	fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
				873	floor_t0 = FLOORF(t[0]);
				874	if (floor_t0 > FLT_MAX_EXP) {
				875	SET_POS_INFINITY(q[0]);
				876	SET_POS_INFINITY(q[2]);
				877	}
				878	else if (floor_t0 < FLT_MIN_EXP) {
				879	q[0] = 0.0F;
				880	q[2] = 0.0F;
				881	}
				882	else {
Brian	761728a	2007-02-24 11:14:57 -0700	[diff] [blame]	883	q[0] = LDEXPF(1.0, (int) floor_t0);
				884	/* Note: GL_NV_vertex_program expects
				885	* result.z = result.x * APPX(result.y)
				886	* We do what the ARB extension says.
				887	*/
				888	q[2] = pow(2.0, t[0]);
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	889	}
				890	q[1] = t[0] - floor_t0;
				891	q[3] = 1.0F;
				892	store_vector4( inst, machine, q );
				893	}
				894	break;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	895	case OPCODE_EX2: /* Exponential base 2 */
				896	{
				897	GLfloat a[4], result[4];
				898	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				899	result[0] = result[1] = result[2] = result[3] =
				900	(GLfloat) _mesa_pow(2.0, a[0]);
				901	store_vector4(inst, machine, result);
				902	}
				903	break;
				904	case OPCODE_FLR:
				905	{
				906	GLfloat a[4], result[4];
				907	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				908	result[0] = FLOORF(a[0]);
				909	result[1] = FLOORF(a[1]);
				910	result[2] = FLOORF(a[2]);
				911	result[3] = FLOORF(a[3]);
				912	store_vector4(inst, machine, result);
				913	}
				914	break;
				915	case OPCODE_FRC:
				916	{
				917	GLfloat a[4], result[4];
				918	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				919	result[0] = a[0] - FLOORF(a[0]);
				920	result[1] = a[1] - FLOORF(a[1]);
				921	result[2] = a[2] - FLOORF(a[2]);
				922	result[3] = a[3] - FLOORF(a[3]);
				923	store_vector4(inst, machine, result);
				924	}
				925	break;
				926	case OPCODE_IF:
				927	if (eval_condition(machine, inst)) {
				928	/* do if-clause (just continue execution) */
				929	}
				930	else {
				931	/* go to the instruction after ELSE or ENDIF */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	932	assert(inst->BranchTarget >= 0);
				933	pc = inst->BranchTarget - 1;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	934	}
				935	break;
				936	case OPCODE_ELSE:
				937	/* goto ENDIF */
				938	assert(inst->BranchTarget >= 0);
				939	pc = inst->BranchTarget - 1;
				940	break;
				941	case OPCODE_ENDIF:
				942	/* nothing */
				943	break;
				944	case OPCODE_INT: /* float to int */
				945	{
				946	GLfloat a[4], result[4];
				947	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				948	result[0] = (GLfloat) (GLint) a[0];
				949	result[1] = (GLfloat) (GLint) a[1];
				950	result[2] = (GLfloat) (GLint) a[2];
				951	result[3] = (GLfloat) (GLint) a[3];
				952	store_vector4(inst, machine, result);
				953	}
				954	break;
				955	case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
				956	if (eval_condition(machine, inst)) {
				957	return GL_FALSE;
				958	}
				959	break;
				960	case OPCODE_KIL: /* ARB_f_p only */
				961	{
				962	GLfloat a[4];
				963	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				964	if (a[0] < 0.0F \|\| a[1] < 0.0F \|\| a[2] < 0.0F \|\| a[3] < 0.0F) {
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	965	return GL_FALSE;
				966	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	967	}
				968	break;
				969	case OPCODE_LG2: /* log base 2 */
				970	{
				971	GLfloat a[4], result[4];
				972	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				973	result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
				974	store_vector4(inst, machine, result);
				975	}
				976	break;
				977	case OPCODE_LIT:
				978	{
				979	const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
				980	GLfloat a[4], result[4];
				981	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				982	a[0] = MAX2(a[0], 0.0F);
				983	a[1] = MAX2(a[1], 0.0F);
				984	/* XXX ARB version clamps a[3], NV version doesn't */
				985	a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
				986	result[0] = 1.0F;
				987	result[1] = a[0];
				988	/* XXX we could probably just use pow() here */
				989	if (a[0] > 0.0F) {
				990	if (a[1] == 0.0 && a[3] == 0.0)
				991	result[2] = 1.0;
				992	else
				993	result[2] = EXPF(a[3] * LOGF(a[1]));
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	994	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	995	else {
				996	result[2] = 0.0;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	997	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	998	result[3] = 1.0F;
				999	store_vector4(inst, machine, result);
				1000	if (DEBUG_PROG) {
				1001	printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
				1002	result[0], result[1], result[2], result[3],
				1003	a[0], a[1], a[2], a[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1004	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1005	}
				1006	break;
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	1007	case OPCODE_LOG:
				1008	{
				1009	GLfloat t[4], q[4], abs_t0;
				1010	fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
				1011	abs_t0 = FABSF(t[0]);
				1012	if (abs_t0 != 0.0F) {
				1013	/* Since we really can't handle infinite values on VMS
				1014	* like other OSes we'll use __MAXFLOAT to represent
				1015	* infinity. This may need some tweaking.
				1016	*/
				1017	#ifdef VMS
				1018	if (abs_t0 == __MAXFLOAT)
				1019	#else
				1020	if (IS_INF_OR_NAN(abs_t0))
				1021	#endif
				1022	{
				1023	SET_POS_INFINITY(q[0]);
				1024	q[1] = 1.0F;
				1025	SET_POS_INFINITY(q[2]);
				1026	}
				1027	else {
				1028	int exponent;
				1029	GLfloat mantissa = FREXPF(t[0], &exponent);
				1030	q[0] = (GLfloat) (exponent - 1);
				1031	q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
				1032	q[2] = (GLfloat) (q[0] + LOG2(q[1]));
				1033	}
				1034	}
				1035	else {
				1036	SET_NEG_INFINITY(q[0]);
				1037	q[1] = 1.0F;
				1038	SET_NEG_INFINITY(q[2]);
				1039	}
				1040	q[3] = 1.0;
				1041	store_vector4(inst, machine, q);
				1042	}
				1043	break;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1044	case OPCODE_LRP:
				1045	{
				1046	GLfloat a[4], b[4], c[4], result[4];
				1047	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1048	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1049	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				1050	result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
				1051	result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
				1052	result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
				1053	result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
				1054	store_vector4(inst, machine, result);
				1055	if (DEBUG_PROG) {
				1056	printf("LRP (%g %g %g %g) = (%g %g %g %g), "
				1057	"(%g %g %g %g), (%g %g %g %g)\n",
				1058	result[0], result[1], result[2], result[3],
				1059	a[0], a[1], a[2], a[3],
				1060	b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1061	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1062	}
				1063	break;
				1064	case OPCODE_MAD:
				1065	{
				1066	GLfloat a[4], b[4], c[4], result[4];
				1067	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1068	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1069	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				1070	result[0] = a[0] * b[0] + c[0];
				1071	result[1] = a[1] * b[1] + c[1];
				1072	result[2] = a[2] * b[2] + c[2];
				1073	result[3] = a[3] * b[3] + c[3];
				1074	store_vector4(inst, machine, result);
				1075	if (DEBUG_PROG) {
				1076	printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
				1077	"(%g %g %g %g) + (%g %g %g %g)\n",
				1078	result[0], result[1], result[2], result[3],
				1079	a[0], a[1], a[2], a[3],
				1080	b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1081	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1082	}
				1083	break;
				1084	case OPCODE_MAX:
				1085	{
				1086	GLfloat a[4], b[4], result[4];
				1087	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1088	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1089	result[0] = MAX2(a[0], b[0]);
				1090	result[1] = MAX2(a[1], b[1]);
				1091	result[2] = MAX2(a[2], b[2]);
				1092	result[3] = MAX2(a[3], b[3]);
				1093	store_vector4(inst, machine, result);
				1094	if (DEBUG_PROG) {
				1095	printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
				1096	result[0], result[1], result[2], result[3],
				1097	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1098	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1099	}
				1100	break;
				1101	case OPCODE_MIN:
				1102	{
				1103	GLfloat a[4], b[4], result[4];
				1104	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1105	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1106	result[0] = MIN2(a[0], b[0]);
				1107	result[1] = MIN2(a[1], b[1]);
				1108	result[2] = MIN2(a[2], b[2]);
				1109	result[3] = MIN2(a[3], b[3]);
				1110	store_vector4(inst, machine, result);
				1111	}
				1112	break;
				1113	case OPCODE_MOV:
				1114	{
				1115	GLfloat result[4];
				1116	fetch_vector4(ctx, &inst->SrcReg[0], machine, result);
				1117	store_vector4(inst, machine, result);
				1118	if (DEBUG_PROG) {
				1119	printf("MOV (%g %g %g %g)\n",
				1120	result[0], result[1], result[2], result[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1121	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1122	}
				1123	break;
				1124	case OPCODE_MUL:
				1125	{
				1126	GLfloat a[4], b[4], result[4];
				1127	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1128	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1129	result[0] = a[0] * b[0];
				1130	result[1] = a[1] * b[1];
				1131	result[2] = a[2] * b[2];
				1132	result[3] = a[3] * b[3];
				1133	store_vector4(inst, machine, result);
				1134	if (DEBUG_PROG) {
				1135	printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
				1136	result[0], result[1], result[2], result[3],
				1137	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1138	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1139	}
				1140	break;
				1141	case OPCODE_NOISE1:
				1142	{
				1143	GLfloat a[4], result[4];
				1144	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1145	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1146	result[1] =
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1147	result[2] = result[3] = _slang_library_noise1(a[0]);
				1148	store_vector4(inst, machine, result);
				1149	}
				1150	break;
				1151	case OPCODE_NOISE2:
				1152	{
				1153	GLfloat a[4], result[4];
				1154	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1155	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1156	result[1] =
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1157	result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
				1158	store_vector4(inst, machine, result);
				1159	}
				1160	break;
				1161	case OPCODE_NOISE3:
				1162	{
				1163	GLfloat a[4], result[4];
				1164	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1165	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1166	result[1] =
				1167	result[2] =
				1168	result[3] = _slang_library_noise3(a[0], a[1], a[2]);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1169	store_vector4(inst, machine, result);
				1170	}
				1171	break;
				1172	case OPCODE_NOISE4:
				1173	{
				1174	GLfloat a[4], result[4];
				1175	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1176	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1177	result[1] =
				1178	result[2] =
				1179	result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1180	store_vector4(inst, machine, result);
				1181	}
				1182	break;
				1183	case OPCODE_NOP:
				1184	break;
				1185	case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
				1186	{
				1187	GLfloat a[4], result[4];
				1188	GLhalfNV hx, hy;
				1189	GLuint rawResult = (GLuint ) result;
				1190	GLuint twoHalves;
				1191	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1192	hx = _mesa_float_to_half(a[0]);
				1193	hy = _mesa_float_to_half(a[1]);
				1194	twoHalves = hx \| (hy << 16);
				1195	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1196	= twoHalves;
				1197	store_vector4(inst, machine, result);
				1198	}
				1199	break;
				1200	case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
				1201	{
				1202	GLfloat a[4], result[4];
				1203	GLuint usx, usy, rawResult = (GLuint ) result;
				1204	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1205	a[0] = CLAMP(a[0], 0.0F, 1.0F);
				1206	a[1] = CLAMP(a[1], 0.0F, 1.0F);
				1207	usx = IROUND(a[0] * 65535.0F);
				1208	usy = IROUND(a[1] * 65535.0F);
				1209	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1210	= usx \| (usy << 16);
				1211	store_vector4(inst, machine, result);
				1212	}
				1213	break;
				1214	case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
				1215	{
				1216	GLfloat a[4], result[4];
				1217	GLuint ubx, uby, ubz, ubw, rawResult = (GLuint ) result;
				1218	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1219	a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
				1220	a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
				1221	a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
				1222	a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
				1223	ubx = IROUND(127.0F * a[0] + 128.0F);
				1224	uby = IROUND(127.0F * a[1] + 128.0F);
				1225	ubz = IROUND(127.0F * a[2] + 128.0F);
				1226	ubw = IROUND(127.0F * a[3] + 128.0F);
				1227	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1228	= ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
				1229	store_vector4(inst, machine, result);
				1230	}
				1231	break;
				1232	case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
				1233	{
				1234	GLfloat a[4], result[4];
				1235	GLuint ubx, uby, ubz, ubw, rawResult = (GLuint ) result;
				1236	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1237	a[0] = CLAMP(a[0], 0.0F, 1.0F);
				1238	a[1] = CLAMP(a[1], 0.0F, 1.0F);
				1239	a[2] = CLAMP(a[2], 0.0F, 1.0F);
				1240	a[3] = CLAMP(a[3], 0.0F, 1.0F);
				1241	ubx = IROUND(255.0F * a[0]);
				1242	uby = IROUND(255.0F * a[1]);
				1243	ubz = IROUND(255.0F * a[2]);
				1244	ubw = IROUND(255.0F * a[3]);
				1245	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1246	= ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
				1247	store_vector4(inst, machine, result);
				1248	}
				1249	break;
				1250	case OPCODE_POW:
				1251	{
				1252	GLfloat a[4], b[4], result[4];
				1253	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1254	fetch_vector1(ctx, &inst->SrcReg[1], machine, b);
				1255	result[0] = result[1] = result[2] = result[3]
				1256	= (GLfloat) _mesa_pow(a[0], b[0]);
				1257	store_vector4(inst, machine, result);
				1258	}
				1259	break;
				1260	case OPCODE_RCP:
				1261	{
				1262	GLfloat a[4], result[4];
				1263	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1264	if (DEBUG_PROG) {
				1265	if (a[0] == 0)
				1266	printf("RCP(0)\n");
				1267	else if (IS_INF_OR_NAN(a[0]))
				1268	printf("RCP(inf)\n");
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1269	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1270	result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
				1271	store_vector4(inst, machine, result);
				1272	}
				1273	break;
				1274	case OPCODE_RET: /* return from subroutine (conditional) */
				1275	if (eval_condition(machine, inst)) {
				1276	if (machine->StackDepth == 0) {
				1277	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1278	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1279	pc = machine->CallStack[--machine->StackDepth];
				1280	}
				1281	break;
				1282	case OPCODE_RFL: /* reflection vector */
				1283	{
				1284	GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
				1285	fetch_vector4(ctx, &inst->SrcReg[0], machine, axis);
				1286	fetch_vector4(ctx, &inst->SrcReg[1], machine, dir);
				1287	tmpW = DOT3(axis, axis);
				1288	tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
				1289	result[0] = tmpX * axis[0] - dir[0];
				1290	result[1] = tmpX * axis[1] - dir[1];
				1291	result[2] = tmpX * axis[2] - dir[2];
				1292	/* result[3] is never written! XXX enforce in parser! */
				1293	store_vector4(inst, machine, result);
				1294	}
				1295	break;
				1296	case OPCODE_RSQ: /* 1 / sqrt() */
				1297	{
				1298	GLfloat a[4], result[4];
				1299	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1300	a[0] = FABSF(a[0]);
				1301	result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
				1302	store_vector4(inst, machine, result);
				1303	if (DEBUG_PROG) {
				1304	printf("RSQ %g = 1/sqrt(\|%g\|)\n", result[0], a[0]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1305	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1306	}
				1307	break;
				1308	case OPCODE_SCS: /* sine and cos */
				1309	{
				1310	GLfloat a[4], result[4];
				1311	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1312	result[0] = (GLfloat) _mesa_cos(a[0]);
				1313	result[1] = (GLfloat) _mesa_sin(a[0]);
				1314	result[2] = 0.0; /* undefined! */
				1315	result[3] = 0.0; /* undefined! */
				1316	store_vector4(inst, machine, result);
				1317	}
				1318	break;
				1319	case OPCODE_SEQ: /* set on equal */
				1320	{
				1321	GLfloat a[4], b[4], result[4];
				1322	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1323	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1324	result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
				1325	result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
				1326	result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
				1327	result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
				1328	store_vector4(inst, machine, result);
				1329	}
				1330	break;
				1331	case OPCODE_SFL: /* set false, operands ignored */
				1332	{
				1333	static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
				1334	store_vector4(inst, machine, result);
				1335	}
				1336	break;
				1337	case OPCODE_SGE: /* set on greater or equal */
				1338	{
				1339	GLfloat a[4], b[4], result[4];
				1340	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1341	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1342	result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
				1343	result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
				1344	result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
				1345	result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
				1346	store_vector4(inst, machine, result);
				1347	}
				1348	break;
				1349	case OPCODE_SGT: /* set on greater */
				1350	{
				1351	GLfloat a[4], b[4], result[4];
				1352	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1353	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1354	result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
				1355	result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
				1356	result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
				1357	result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
				1358	store_vector4(inst, machine, result);
				1359	if (DEBUG_PROG) {
				1360	printf("SGT %g %g %g %g\n",
				1361	result[0], result[1], result[2], result[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1362	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1363	}
				1364	break;
				1365	case OPCODE_SIN:
				1366	{
				1367	GLfloat a[4], result[4];
				1368	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1369	result[0] = result[1] = result[2] = result[3]
				1370	= (GLfloat) _mesa_sin(a[0]);
				1371	store_vector4(inst, machine, result);
				1372	}
				1373	break;
				1374	case OPCODE_SLE: /* set on less or equal */
				1375	{
				1376	GLfloat a[4], b[4], result[4];
				1377	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1378	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1379	result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
				1380	result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
				1381	result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
				1382	result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
				1383	store_vector4(inst, machine, result);
				1384	}
				1385	break;
				1386	case OPCODE_SLT: /* set on less */
				1387	{
				1388	GLfloat a[4], b[4], result[4];
				1389	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1390	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1391	result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
				1392	result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
				1393	result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
				1394	result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
				1395	store_vector4(inst, machine, result);
				1396	}
				1397	break;
				1398	case OPCODE_SNE: /* set on not equal */
				1399	{
				1400	GLfloat a[4], b[4], result[4];
				1401	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1402	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1403	result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
				1404	result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
				1405	result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
				1406	result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
				1407	store_vector4(inst, machine, result);
				1408	}
				1409	break;
				1410	case OPCODE_STR: /* set true, operands ignored */
				1411	{
				1412	static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
				1413	store_vector4(inst, machine, result);
				1414	}
				1415	break;
				1416	case OPCODE_SUB:
				1417	{
				1418	GLfloat a[4], b[4], result[4];
				1419	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1420	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1421	result[0] = a[0] - b[0];
				1422	result[1] = a[1] - b[1];
				1423	result[2] = a[2] - b[2];
				1424	result[3] = a[3] - b[3];
				1425	store_vector4(inst, machine, result);
				1426	if (DEBUG_PROG) {
				1427	printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
				1428	result[0], result[1], result[2], result[3],
				1429	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1430	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1431	}
				1432	break;
				1433	case OPCODE_SWZ: /* extended swizzle */
				1434	{
				1435	const struct prog_src_register *source = &inst->SrcReg[0];
				1436	const GLfloat *src = get_register_pointer(ctx, source, machine);
				1437	GLfloat result[4];
				1438	GLuint i;
				1439	for (i = 0; i < 4; i++) {
				1440	const GLuint swz = GET_SWZ(source->Swizzle, i);
				1441	if (swz == SWIZZLE_ZERO)
				1442	result[i] = 0.0;
				1443	else if (swz == SWIZZLE_ONE)
				1444	result[i] = 1.0;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1445	else {
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1446	ASSERT(swz >= 0);
				1447	ASSERT(swz <= 3);
				1448	result[i] = src[swz];
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1449	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1450	if (source->NegateBase & (1 << i))
				1451	result[i] = -result[i];
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1452	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1453	store_vector4(inst, machine, result);
				1454	}
				1455	break;
				1456	case OPCODE_TEX: /* Both ARB and NV frag prog */
				1457	/* Texel lookup */
				1458	{
				1459	/* Note: only use the precomputed lambda value when we're
				1460	* sampling texture unit [K] with texcoord[K].
				1461	* Otherwise, the lambda value may have no relation to the
				1462	* instruction's texcoord or texture image. Using the wrong
				1463	* lambda is usually bad news.
				1464	* The rest of the time, just use zero (until we get a more
				1465	* sophisticated way of computing lambda).
				1466	*/
				1467	GLfloat coord[4], color[4], lambda;
				1468	#if 0
				1469	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1470	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1471	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1472	else
				1473	#endif
				1474	lambda = 0.0;
				1475	fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
				1476	machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit,
				1477	color);
				1478	if (DEBUG_PROG) {
				1479	printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
				1480	"lod %f\n",
				1481	color[0], color[1], color[2], color[3],
				1482	inst->TexSrcUnit,
				1483	coord[0], coord[1], coord[2], coord[3], lambda);
				1484	}
				1485	store_vector4(inst, machine, color);
				1486	}
				1487	break;
				1488	case OPCODE_TXB: /* GL_ARB_fragment_program only */
				1489	/* Texel lookup with LOD bias */
				1490	{
				1491	const struct gl_texture_unit *texUnit
				1492	= &ctx->Texture.Unit[inst->TexSrcUnit];
				1493	GLfloat coord[4], color[4], lambda, bias;
				1494	#if 0
				1495	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1496	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1497	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1498	else
				1499	#endif
				1500	lambda = 0.0;
				1501	fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
				1502	/* coord[3] is the bias to add to lambda */
				1503	bias = texUnit->LodBias + coord[3];
				1504	if (texUnit->_Current)
				1505	bias += texUnit->_Current->LodBias;
				1506	machine->FetchTexelLod(ctx, coord, lambda + bias,
				1507	inst->TexSrcUnit, color);
				1508	store_vector4(inst, machine, color);
				1509	}
				1510	break;
				1511	case OPCODE_TXD: /* GL_NV_fragment_program only */
				1512	/* Texture lookup w/ partial derivatives for LOD */
				1513	{
				1514	GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
				1515	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
				1516	fetch_vector4(ctx, &inst->SrcReg[1], machine, dtdx);
				1517	fetch_vector4(ctx, &inst->SrcReg[2], machine, dtdy);
				1518	machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
				1519	inst->TexSrcUnit, color);
				1520	store_vector4(inst, machine, color);
				1521	}
				1522	break;
				1523	case OPCODE_TXP: /* GL_ARB_fragment_program only */
				1524	/* Texture lookup w/ projective divide */
				1525	{
				1526	GLfloat texcoord[4], color[4], lambda;
				1527	#if 0
				1528	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1529	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1530	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1531	else
				1532	#endif
				1533	lambda = 0.0;
				1534	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
				1535	/* Not so sure about this test - if texcoord[3] is
				1536	* zero, we'd probably be fine except for an ASSERT in
				1537	* IROUND_POS() which gets triggered by the inf values created.
				1538	*/
				1539	if (texcoord[3] != 0.0) {
				1540	texcoord[0] /= texcoord[3];
				1541	texcoord[1] /= texcoord[3];
				1542	texcoord[2] /= texcoord[3];
				1543	}
				1544	machine->FetchTexelLod(ctx, texcoord, lambda,
				1545	inst->TexSrcUnit, color);
				1546	store_vector4(inst, machine, color);
				1547	}
				1548	break;
				1549	case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
				1550	/* Texture lookup w/ projective divide */
				1551	{
				1552	GLfloat texcoord[4], color[4], lambda;
				1553	#if 0
				1554	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1555	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1556	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1557	else
				1558	#endif
				1559	lambda = 0.0;
				1560	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
				1561	if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
				1562	texcoord[3] != 0.0) {
				1563	texcoord[0] /= texcoord[3];
				1564	texcoord[1] /= texcoord[3];
				1565	texcoord[2] /= texcoord[3];
				1566	}
				1567	machine->FetchTexelLod(ctx, texcoord, lambda,
				1568	inst->TexSrcUnit, color);
				1569	store_vector4(inst, machine, color);
				1570	}
				1571	break;
				1572	case OPCODE_UP2H: /* unpack two 16-bit floats */
				1573	{
				1574	GLfloat a[4], result[4];
				1575	const GLuint rawBits = (const GLuint ) a;
				1576	GLhalfNV hx, hy;
				1577	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1578	hx = rawBits[0] & 0xffff;
				1579	hy = rawBits[0] >> 16;
				1580	result[0] = result[2] = _mesa_half_to_float(hx);
				1581	result[1] = result[3] = _mesa_half_to_float(hy);
				1582	store_vector4(inst, machine, result);
				1583	}
				1584	break;
				1585	case OPCODE_UP2US: /* unpack two GLushorts */
				1586	{
				1587	GLfloat a[4], result[4];
				1588	const GLuint rawBits = (const GLuint ) a;
				1589	GLushort usx, usy;
				1590	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1591	usx = rawBits[0] & 0xffff;
				1592	usy = rawBits[0] >> 16;
				1593	result[0] = result[2] = usx * (1.0f / 65535.0f);
				1594	result[1] = result[3] = usy * (1.0f / 65535.0f);
				1595	store_vector4(inst, machine, result);
				1596	}
				1597	break;
				1598	case OPCODE_UP4B: /* unpack four GLbytes */
				1599	{
				1600	GLfloat a[4], result[4];
				1601	const GLuint rawBits = (const GLuint ) a;
				1602	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1603	result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
				1604	result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
				1605	result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
				1606	result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
				1607	store_vector4(inst, machine, result);
				1608	}
				1609	break;
				1610	case OPCODE_UP4UB: /* unpack four GLubytes */
				1611	{
				1612	GLfloat a[4], result[4];
				1613	const GLuint rawBits = (const GLuint ) a;
				1614	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1615	result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
				1616	result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
				1617	result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
				1618	result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
				1619	store_vector4(inst, machine, result);
				1620	}
				1621	break;
				1622	case OPCODE_XPD: /* cross product */
				1623	{
				1624	GLfloat a[4], b[4], result[4];
				1625	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1626	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1627	result[0] = a[1] * b[2] - a[2] * b[1];
				1628	result[1] = a[2] * b[0] - a[0] * b[2];
				1629	result[2] = a[0] * b[1] - a[1] * b[0];
				1630	result[3] = 1.0;
				1631	store_vector4(inst, machine, result);
				1632	}
				1633	break;
				1634	case OPCODE_X2D: /* 2-D matrix transform */
				1635	{
				1636	GLfloat a[4], b[4], c[4], result[4];
				1637	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1638	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1639	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				1640	result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
				1641	result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
				1642	result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
				1643	result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
				1644	store_vector4(inst, machine, result);
				1645	}
				1646	break;
				1647	case OPCODE_PRINT:
				1648	{
				1649	if (inst->SrcReg[0].File != -1) {
				1650	GLfloat a[4];
				1651	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1652	_mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
				1653	a[0], a[1], a[2], a[3]);
				1654	}
				1655	else {
				1656	_mesa_printf("%s\n", (const char *) inst->Data);
				1657	}
				1658	}
				1659	break;
				1660	case OPCODE_END:
				1661	return GL_TRUE;
				1662	default:
				1663	_mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
				1664	inst->Opcode);
				1665	return GL_TRUE; /* return value doesn't matter */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1666
				1667	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1668
Brian	cfd0011	2007-02-25 18:30:45 -0700	[diff] [blame]	1669	numExec++;
				1670	if (numExec > maxExec) {
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1671	_mesa_problem(ctx, "Infinite loop detected in fragment program");
				1672	return GL_TRUE;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1673	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1674
				1675	} /* for pc */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1676
				1677	#if FEATURE_MESA_program_debug
				1678	CurrentMachine = NULL;
				1679	#endif
				1680
				1681	return GL_TRUE;
				1682	}