Blame - src/mesa/shader/prog_execute.c - platform/external/mesa3d

blob: 6417a88e900557d6caeb8252efa76aba26aea876 [file] [log] [blame]

Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1	/*
				2	* Mesa 3-D graphics library
				3	* Version: 6.5.3
				4	*
				5	* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
				6	*
				7	* Permission is hereby granted, free of charge, to any person obtaining a
				8	* copy of this software and associated documentation files (the "Software"),
				9	* to deal in the Software without restriction, including without limitation
				10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				11	* and/or sell copies of the Software, and to permit persons to whom the
				12	* Software is furnished to do so, subject to the following conditions:
				13	*
				14	* The above copyright notice and this permission notice shall be included
				15	* in all copies or substantial portions of the Software.
				16	*
				17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
				18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				20	* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
				21	* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				22	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
				23	*/
				24
				25	/**
				26	* \file prog_execute.c
				27	* Software interpreter for vertex/fragment programs.
				28	* \author Brian Paul
				29	*/
				30
				31	/*
				32	* NOTE: we do everything in single-precision floating point; we don't
				33	* currently observe the single/half/fixed-precision qualifiers.
				34	*
				35	*/
				36
				37
				38	#include "glheader.h"
				39	#include "colormac.h"
				40	#include "context.h"
				41	#include "program.h"
				42	#include "prog_execute.h"
				43	#include "prog_instruction.h"
				44	#include "prog_parameter.h"
				45	#include "prog_print.h"
				46	#include "slang_library_noise.h"
				47
				48
				49	/* See comments below for info about this */
				50	#define LAMBDA_ZERO 1
				51
				52	/* debug predicate */
				53	#define DEBUG_PROG 0
				54
				55
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	56	/**
				57	* Set x to positive or negative infinity.
				58	*/
				59	#if defined(USE_IEEE) \|\| defined(_WIN32)
				60	#define SET_POS_INFINITY(x) ( ((GLuint ) (void *)&x) = 0x7F800000 )
				61	#define SET_NEG_INFINITY(x) ( ((GLuint ) (void *)&x) = 0xFF800000 )
				62	#elif defined(VMS)
				63	#define SET_POS_INFINITY(x) x = __MAXFLOAT
				64	#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
				65	#else
				66	#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
				67	#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
				68	#endif
				69
				70	#define SET_FLOAT_BITS(x, bits) ((fi_type ) (void ) &(x))->i = bits
				71
				72
				73	static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
				74
				75
				76
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	77	#if FEATURE_MESA_program_debug
				78	static struct gl_program_machine *CurrentMachine = NULL;
				79
				80	/**
				81	* For GL_MESA_program_debug.
				82	* Return current value (4*GLfloat) of a program register.
				83	* Called via ctx->Driver.GetFragmentProgramRegister().
				84	*/
				85	void
				86	_mesa_get_program_register(GLcontext *ctx, enum register_file file,
				87	GLuint index, GLfloat val[4])
				88	{
				89	if (CurrentMachine) {
				90	switch (file) {
				91	case PROGRAM_INPUT:
				92	if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
				93	COPY_4V(val, CurrentMachine->VertAttribs[index]);
				94	}
				95	else {
				96	COPY_4V(val,
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	97	CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	98	}
				99	break;
				100	case PROGRAM_OUTPUT:
				101	COPY_4V(val, CurrentMachine->Outputs[index]);
				102	break;
				103	case PROGRAM_TEMPORARY:
				104	COPY_4V(val, CurrentMachine->Temporaries[index]);
				105	break;
				106	default:
				107	_mesa_problem(NULL,
				108	"bad register file in _swrast_get_program_register");
				109	}
				110	}
				111	}
				112	#endif /* FEATURE_MESA_program_debug */
				113
				114
				115
				116	/**
				117	* Return a pointer to the 4-element float vector specified by the given
				118	* source register.
				119	*/
				120	static INLINE const GLfloat *
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	121	get_register_pointer(GLcontext * ctx,
				122	const struct prog_src_register *source,
				123	const struct gl_program_machine *machine)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	124	{
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	125	if (source->RelAddr) {
				126	const GLint reg = source->Index + machine->AddressReg[0][0];
				127	ASSERT( (source->File == PROGRAM_ENV_PARAM) \|\|
				128	(source->File == PROGRAM_STATE_VAR) );
				129	if (reg < 0 \|\| reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
				130	return ZeroVec;
				131	else if (source->File == PROGRAM_ENV_PARAM)
				132	return ctx->VertexProgram.Parameters[reg];
				133	else {
Brian	761728a	2007-02-24 11:14:57 -0700	[diff] [blame]	134	ASSERT(source->File == PROGRAM_LOCAL_PARAM \|\|
				135	source->File == PROGRAM_STATE_VAR);
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	136	return machine->CurProgram->Parameters->ParameterValues[reg];
				137	}
				138	}
				139
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	140	switch (source->File) {
				141	case PROGRAM_TEMPORARY:
				142	ASSERT(source->Index < MAX_PROGRAM_TEMPS);
				143	return machine->Temporaries[source->Index];
				144
				145	case PROGRAM_INPUT:
				146	if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
				147	ASSERT(source->Index < VERT_ATTRIB_MAX);
				148	return machine->VertAttribs[source->Index];
				149	}
				150	else {
				151	ASSERT(source->Index < FRAG_ATTRIB_MAX);
				152	return machine->Attribs[source->Index][machine->CurElement];
				153	}
				154
				155	case PROGRAM_OUTPUT:
				156	/* This is only for PRINT */
				157	ASSERT(source->Index < FRAG_RESULT_MAX);
				158	return machine->Outputs[source->Index];
				159
				160	case PROGRAM_LOCAL_PARAM:
				161	ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
				162	return machine->CurProgram->LocalParams[source->Index];
				163
				164	case PROGRAM_ENV_PARAM:
				165	ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
				166	if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
				167	return ctx->VertexProgram.Parameters[source->Index];
				168	else
				169	return ctx->FragmentProgram.Parameters[source->Index];
				170
				171	case PROGRAM_STATE_VAR:
				172	/* Fallthrough */
				173	case PROGRAM_CONSTANT:
				174	/* Fallthrough */
				175	case PROGRAM_UNIFORM:
				176	/* Fallthrough */
				177	case PROGRAM_NAMED_PARAM:
				178	ASSERT(source->Index <
				179	(GLint) machine->CurProgram->Parameters->NumParameters);
				180	return machine->CurProgram->Parameters->ParameterValues[source->Index];
				181
				182	default:
				183	_mesa_problem(ctx,
				184	"Invalid input register file %d in get_register_pointer()",
				185	source->File);
				186	return NULL;
				187	}
				188	}
				189
				190
				191	/**
				192	* Fetch a 4-element float vector from the given source register.
				193	* Apply swizzling and negating as needed.
				194	*/
				195	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	196	fetch_vector4(GLcontext * ctx,
				197	const struct prog_src_register *source,
				198	const struct gl_program_machine *machine, GLfloat result[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	199	{
				200	const GLfloat *src = get_register_pointer(ctx, source, machine);
				201	ASSERT(src);
				202
				203	if (source->Swizzle == SWIZZLE_NOOP) {
				204	/* no swizzling */
				205	COPY_4V(result, src);
				206	}
				207	else {
				208	ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
				209	ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
				210	ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
				211	ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
				212	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				213	result[1] = src[GET_SWZ(source->Swizzle, 1)];
				214	result[2] = src[GET_SWZ(source->Swizzle, 2)];
				215	result[3] = src[GET_SWZ(source->Swizzle, 3)];
				216	}
				217
				218	if (source->NegateBase) {
				219	result[0] = -result[0];
				220	result[1] = -result[1];
				221	result[2] = -result[2];
				222	result[3] = -result[3];
				223	}
				224	if (source->Abs) {
				225	result[0] = FABSF(result[0]);
				226	result[1] = FABSF(result[1]);
				227	result[2] = FABSF(result[2]);
				228	result[3] = FABSF(result[3]);
				229	}
				230	if (source->NegateAbs) {
				231	result[0] = -result[0];
				232	result[1] = -result[1];
				233	result[2] = -result[2];
				234	result[3] = -result[3];
				235	}
				236	}
				237
				238	#if 0
				239	/**
				240	* Fetch the derivative with respect to X for the given register.
				241	* \return GL_TRUE if it was easily computed or GL_FALSE if we
				242	* need to execute another instance of the program (ugh)!
				243	*/
				244	static GLboolean
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	245	fetch_vector4_deriv(GLcontext * ctx,
				246	const struct prog_src_register *source,
				247	const SWspan * span,
				248	char xOrY, GLint column, GLfloat result[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	249	{
				250	GLfloat src[4];
				251
				252	ASSERT(xOrY == 'X' \|\| xOrY == 'Y');
				253
				254	switch (source->Index) {
				255	case FRAG_ATTRIB_WPOS:
				256	if (xOrY == 'X') {
				257	src[0] = 1.0;
				258	src[1] = 0.0;
				259	src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	260	/ ctx->DrawBuffer->_DepthMaxF;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	261	src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
				262	}
				263	else {
				264	src[0] = 0.0;
				265	src[1] = 1.0;
				266	src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	267	/ ctx->DrawBuffer->_DepthMaxF;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	268	src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
				269	}
				270	break;
				271	case FRAG_ATTRIB_COL0:
				272	case FRAG_ATTRIB_COL1:
				273	if (xOrY == 'X') {
				274	src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
				275	src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
				276	src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
				277	src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
				278	}
				279	else {
				280	src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
				281	src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
				282	src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
				283	src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
				284	}
				285	break;
				286	case FRAG_ATTRIB_FOGC:
				287	if (xOrY == 'X') {
				288	src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
				289	src[1] = 0.0;
				290	src[2] = 0.0;
				291	src[3] = 0.0;
				292	}
				293	else {
				294	src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
				295	src[1] = 0.0;
				296	src[2] = 0.0;
				297	src[3] = 0.0;
				298	}
				299	break;
				300	default:
				301	assert(source->Index < FRAG_ATTRIB_MAX);
				302	/* texcoord or varying */
				303	if (xOrY == 'X') {
				304	/* this is a little tricky - I think I've got it right */
				305	const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	306	+
				307	span->attrStepX[source->Index][3] *
				308	column);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	309	src[0] = span->attrStepX[source->Index][0] * invQ;
				310	src[1] = span->attrStepX[source->Index][1] * invQ;
				311	src[2] = span->attrStepX[source->Index][2] * invQ;
				312	src[3] = span->attrStepX[source->Index][3] * invQ;
				313	}
				314	else {
				315	/* Tricky, as above, but in Y direction */
				316	const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
				317	+ span->attrStepY[source->Index][3]);
				318	src[0] = span->attrStepY[source->Index][0] * invQ;
				319	src[1] = span->attrStepY[source->Index][1] * invQ;
				320	src[2] = span->attrStepY[source->Index][2] * invQ;
				321	src[3] = span->attrStepY[source->Index][3] * invQ;
				322	}
				323	break;
				324	}
				325
				326	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				327	result[1] = src[GET_SWZ(source->Swizzle, 1)];
				328	result[2] = src[GET_SWZ(source->Swizzle, 2)];
				329	result[3] = src[GET_SWZ(source->Swizzle, 3)];
				330
				331	if (source->NegateBase) {
				332	result[0] = -result[0];
				333	result[1] = -result[1];
				334	result[2] = -result[2];
				335	result[3] = -result[3];
				336	}
				337	if (source->Abs) {
				338	result[0] = FABSF(result[0]);
				339	result[1] = FABSF(result[1]);
				340	result[2] = FABSF(result[2]);
				341	result[3] = FABSF(result[3]);
				342	}
				343	if (source->NegateAbs) {
				344	result[0] = -result[0];
				345	result[1] = -result[1];
				346	result[2] = -result[2];
				347	result[3] = -result[3];
				348	}
				349	return GL_TRUE;
				350	}
				351	#endif
				352
				353
				354	/**
				355	* As above, but only return result[0] element.
				356	*/
				357	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	358	fetch_vector1(GLcontext * ctx,
				359	const struct prog_src_register *source,
				360	const struct gl_program_machine *machine, GLfloat result[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	361	{
				362	const GLfloat *src = get_register_pointer(ctx, source, machine);
				363	ASSERT(src);
				364
				365	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				366
				367	if (source->NegateBase) {
				368	result[0] = -result[0];
				369	}
				370	if (source->Abs) {
				371	result[0] = FABSF(result[0]);
				372	}
				373	if (source->NegateAbs) {
				374	result[0] = -result[0];
				375	}
				376	}
				377
				378
				379	/**
				380	* Test value against zero and return GT, LT, EQ or UN if NaN.
				381	*/
				382	static INLINE GLuint
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	383	generate_cc(float value)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	384	{
				385	if (value != value)
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	386	return COND_UN; /* NaN */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	387	if (value > 0.0F)
				388	return COND_GT;
				389	if (value < 0.0F)
				390	return COND_LT;
				391	return COND_EQ;
				392	}
				393
				394
				395	/**
				396	* Test if the ccMaskRule is satisfied by the given condition code.
				397	* Used to mask destination writes according to the current condition code.
				398	*/
				399	static INLINE GLboolean
				400	test_cc(GLuint condCode, GLuint ccMaskRule)
				401	{
				402	switch (ccMaskRule) {
				403	case COND_EQ: return (condCode == COND_EQ);
				404	case COND_NE: return (condCode != COND_EQ);
				405	case COND_LT: return (condCode == COND_LT);
				406	case COND_GE: return (condCode == COND_GT \|\| condCode == COND_EQ);
				407	case COND_LE: return (condCode == COND_LT \|\| condCode == COND_EQ);
				408	case COND_GT: return (condCode == COND_GT);
				409	case COND_TR: return GL_TRUE;
				410	case COND_FL: return GL_FALSE;
				411	default: return GL_TRUE;
				412	}
				413	}
				414
				415
				416	/**
				417	* Evaluate the 4 condition codes against a predicate and return GL_TRUE
				418	* or GL_FALSE to indicate result.
				419	*/
				420	static INLINE GLboolean
				421	eval_condition(const struct gl_program_machine *machine,
				422	const struct prog_instruction *inst)
				423	{
				424	const GLuint swizzle = inst->DstReg.CondSwizzle;
				425	const GLuint condMask = inst->DstReg.CondMask;
				426	if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) \|\|
				427	test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) \|\|
				428	test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) \|\|
				429	test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
				430	return GL_TRUE;
				431	}
				432	else {
				433	return GL_FALSE;
				434	}
				435	}
				436
				437
				438
				439	/**
				440	* Store 4 floats into a register. Observe the instructions saturate and
				441	* set-condition-code flags.
				442	*/
				443	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	444	store_vector4(const struct prog_instruction *inst,
				445	struct gl_program_machine *machine, const GLfloat value[4])
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	446	{
				447	const struct prog_dst_register *dest = &(inst->DstReg);
				448	const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
				449	GLfloat *dstReg;
				450	GLfloat dummyReg[4];
				451	GLfloat clampedValue[4];
				452	GLuint writeMask = dest->WriteMask;
				453
				454	switch (dest->File) {
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	455	case PROGRAM_OUTPUT:
				456	dstReg = machine->Outputs[dest->Index];
				457	break;
				458	case PROGRAM_TEMPORARY:
				459	dstReg = machine->Temporaries[dest->Index];
				460	break;
				461	case PROGRAM_WRITE_ONLY:
				462	dstReg = dummyReg;
				463	return;
				464	default:
				465	_mesa_problem(NULL, "bad register file in store_vector4(fp)");
				466	return;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	467	}
				468
				469	#if 0
				470	if (value[0] > 1.0e10 \|\|
				471	IS_INF_OR_NAN(value[0]) \|\|
				472	IS_INF_OR_NAN(value[1]) \|\|
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	473	IS_INF_OR_NAN(value[2]) \|\| IS_INF_OR_NAN(value[3]))
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	474	printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
				475	#endif
				476
				477	if (clamp) {
				478	clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
				479	clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
				480	clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
				481	clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
				482	value = clampedValue;
				483	}
				484
				485	if (dest->CondMask != COND_TR) {
				486	/* condition codes may turn off some writes */
				487	if (writeMask & WRITEMASK_X) {
				488	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
				489	dest->CondMask))
				490	writeMask &= ~WRITEMASK_X;
				491	}
				492	if (writeMask & WRITEMASK_Y) {
				493	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
				494	dest->CondMask))
				495	writeMask &= ~WRITEMASK_Y;
				496	}
				497	if (writeMask & WRITEMASK_Z) {
				498	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
				499	dest->CondMask))
				500	writeMask &= ~WRITEMASK_Z;
				501	}
				502	if (writeMask & WRITEMASK_W) {
				503	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
				504	dest->CondMask))
				505	writeMask &= ~WRITEMASK_W;
				506	}
				507	}
				508
				509	if (writeMask & WRITEMASK_X)
				510	dstReg[0] = value[0];
				511	if (writeMask & WRITEMASK_Y)
				512	dstReg[1] = value[1];
				513	if (writeMask & WRITEMASK_Z)
				514	dstReg[2] = value[2];
				515	if (writeMask & WRITEMASK_W)
				516	dstReg[3] = value[3];
				517
				518	if (inst->CondUpdate) {
				519	if (writeMask & WRITEMASK_X)
				520	machine->CondCodes[0] = generate_cc(value[0]);
				521	if (writeMask & WRITEMASK_Y)
				522	machine->CondCodes[1] = generate_cc(value[1]);
				523	if (writeMask & WRITEMASK_Z)
				524	machine->CondCodes[2] = generate_cc(value[2]);
				525	if (writeMask & WRITEMASK_W)
				526	machine->CondCodes[3] = generate_cc(value[3]);
				527	}
				528	}
				529
				530
				531	#if 0
				532	/**
				533	* Initialize a new machine state instance from an existing one, adding
				534	* the partial derivatives onto the input registers.
				535	* Used to implement DDX and DDY instructions in non-trivial cases.
				536	*/
				537	static void
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	538	init_machine_deriv(GLcontext * ctx,
				539	const struct gl_program_machine *machine,
				540	const struct gl_fragment_program *program,
				541	const SWspan * span, char xOrY,
				542	struct gl_program_machine *dMachine)
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	543	{
				544	GLuint attr;
				545
				546	ASSERT(xOrY == 'X' \|\| xOrY == 'Y');
				547
				548	/* copy existing machine */
				549	_mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
				550
				551	if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
				552	/* XXX also need to do this when using valgrind */
				553	/* Clear temporary registers (undefined for ARB_f_p) */
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	554	_mesa_bzero((void *) machine->Temporaries,
				555	MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	556	}
				557
				558	/* Add derivatives */
				559	if (program->Base.InputsRead & FRAG_BIT_WPOS) {
				560	GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
				561	if (xOrY == 'X') {
				562	wpos[0] += 1.0F;
				563	wpos[1] += 0.0F;
				564	wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
				565	wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
				566	}
				567	else {
				568	wpos[0] += 0.0F;
				569	wpos[1] += 1.0F;
				570	wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
				571	wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
				572	}
				573	}
				574
				575	/* primary, secondary colors */
				576	for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
				577	if (program->Base.InputsRead & (1 << attr)) {
				578	GLfloat *col = machine->Attribs[attr][machine->CurElement];
				579	if (xOrY == 'X') {
				580	col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
				581	col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
				582	col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
				583	col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
				584	}
				585	else {
				586	col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
				587	col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
				588	col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
				589	col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
				590	}
				591	}
				592	}
				593	if (program->Base.InputsRead & FRAG_BIT_FOGC) {
				594	GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
				595	if (xOrY == 'X') {
				596	fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
				597	}
				598	else {
				599	fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
				600	}
				601	}
				602	/* texcoord and varying vars */
				603	for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
				604	if (program->Base.InputsRead & (1 << attr)) {
				605	GLfloat *val = machine->Attribs[attr][machine->CurElement];
				606	/* XXX perspective-correct interpolation */
				607	if (xOrY == 'X') {
				608	val[0] += span->attrStepX[attr][0];
				609	val[1] += span->attrStepX[attr][1];
				610	val[2] += span->attrStepX[attr][2];
				611	val[3] += span->attrStepX[attr][3];
				612	}
				613	else {
				614	val[0] += span->attrStepY[attr][0];
				615	val[1] += span->attrStepY[attr][1];
				616	val[2] += span->attrStepY[attr][2];
				617	val[3] += span->attrStepY[attr][3];
				618	}
				619	}
				620	}
				621
				622	/* init condition codes */
				623	dMachine->CondCodes[0] = COND_EQ;
				624	dMachine->CondCodes[1] = COND_EQ;
				625	dMachine->CondCodes[2] = COND_EQ;
				626	dMachine->CondCodes[3] = COND_EQ;
				627	}
				628	#endif
				629
				630
				631	/**
				632	* Execute the given vertex/fragment program.
				633	*
				634	* \param ctx - rendering context
				635	* \param program - the fragment program to execute
				636	* \param machine - machine state (register file)
				637	* \param maxInst - max number of instructions to execute
				638	* \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
				639	*/
				640	GLboolean
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	641	_mesa_execute_program(GLcontext * ctx,
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	642	const struct gl_program *program, GLuint maxInst,
				643	struct gl_program_machine *machine, GLuint element)
				644	{
				645	const GLuint MAX_EXEC = 10000;
				646	GLint pc, total = 0;
				647
				648	machine->CurProgram = program;
				649
				650	if (DEBUG_PROG) {
				651	printf("execute program %u --------------------\n", program->Id);
				652	}
				653
				654	#if FEATURE_MESA_program_debug
				655	CurrentMachine = machine;
				656	#endif
				657
				658	for (pc = 0; pc < maxInst; pc++) {
				659	const struct prog_instruction *inst = program->Instructions + pc;
				660
				661	#if FEATURE_MESA_program_debug
				662	if (ctx->FragmentProgram.CallbackEnabled &&
				663	ctx->FragmentProgram.Callback) {
				664	ctx->FragmentProgram.CurrentPosition = inst->StringPos;
				665	ctx->FragmentProgram.Callback(program->Target,
				666	ctx->FragmentProgram.CallbackData);
				667	}
				668	#endif
				669
				670	if (DEBUG_PROG) {
				671	_mesa_print_instruction(inst);
				672	}
				673
				674	switch (inst->Opcode) {
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	675	case OPCODE_ABS:
				676	{
				677	GLfloat a[4], result[4];
				678	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				679	result[0] = FABSF(a[0]);
				680	result[1] = FABSF(a[1]);
				681	result[2] = FABSF(a[2]);
				682	result[3] = FABSF(a[3]);
				683	store_vector4(inst, machine, result);
				684	}
				685	break;
				686	case OPCODE_ADD:
				687	{
				688	GLfloat a[4], b[4], result[4];
				689	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				690	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				691	result[0] = a[0] + b[0];
				692	result[1] = a[1] + b[1];
				693	result[2] = a[2] + b[2];
				694	result[3] = a[3] + b[3];
				695	store_vector4(inst, machine, result);
				696	if (DEBUG_PROG) {
				697	printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
				698	result[0], result[1], result[2], result[3],
				699	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	700	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	701	}
				702	break;
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	703	case OPCODE_ARL:
				704	{
				705	GLfloat t[4];
				706	fetch_vector4(ctx, &inst->SrcReg[0], machine, t);
				707	machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
				708	}
				709	break;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	710	case OPCODE_BGNLOOP:
				711	/* no-op */
				712	break;
				713	case OPCODE_ENDLOOP:
				714	/* subtract 1 here since pc is incremented by for(pc) loop */
				715	pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
				716	break;
				717	case OPCODE_BGNSUB: /* begin subroutine */
				718	break;
				719	case OPCODE_ENDSUB: /* end subroutine */
				720	break;
				721	case OPCODE_BRA: /* branch (conditional) */
				722	/* fall-through */
				723	case OPCODE_BRK: /* break out of loop (conditional) */
				724	/* fall-through */
				725	case OPCODE_CONT: /* continue loop (conditional) */
				726	if (eval_condition(machine, inst)) {
				727	/* take branch */
				728	/* Subtract 1 here since we'll do pc++ at end of for-loop */
				729	pc = inst->BranchTarget - 1;
				730	}
				731	break;
				732	case OPCODE_CAL: /* Call subroutine (conditional) */
				733	if (eval_condition(machine, inst)) {
				734	/* call the subroutine */
				735	if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
				736	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	737	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	738	machine->CallStack[machine->StackDepth++] = pc + 1;
				739	pc = inst->BranchTarget; /* XXX - 1 ??? */
				740	}
				741	break;
				742	case OPCODE_CMP:
				743	{
				744	GLfloat a[4], b[4], c[4], result[4];
				745	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				746	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				747	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				748	result[0] = a[0] < 0.0F ? b[0] : c[0];
				749	result[1] = a[1] < 0.0F ? b[1] : c[1];
				750	result[2] = a[2] < 0.0F ? b[2] : c[2];
				751	result[3] = a[3] < 0.0F ? b[3] : c[3];
				752	store_vector4(inst, machine, result);
				753	}
				754	break;
				755	case OPCODE_COS:
				756	{
				757	GLfloat a[4], result[4];
				758	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				759	result[0] = result[1] = result[2] = result[3]
				760	= (GLfloat) _mesa_cos(a[0]);
				761	store_vector4(inst, machine, result);
				762	}
				763	break;
				764	case OPCODE_DDX: /* Partial derivative with respect to X */
				765	{
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	766	#if 0
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	767	GLfloat a[4], aNext[4], result[4];
				768	struct gl_program_machine dMachine;
				769	if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
				770	column, result)) {
				771	/* This is tricky. Make a copy of the current machine state,
				772	* increment the input registers by the dx or dy partial
				773	* derivatives, then re-execute the program up to the
				774	* preceeding instruction, then fetch the source register.
				775	* Finally, find the difference in the register values for
				776	* the original and derivative runs.
				777	*/
				778	fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
				779	init_machine_deriv(ctx, machine, program, span,
				780	'X', &dMachine);
				781	execute_program(ctx, program, pc, &dMachine, span, column);
				782	fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
				783	aNext);
				784	result[0] = aNext[0] - a[0];
				785	result[1] = aNext[1] - a[1];
				786	result[2] = aNext[2] - a[2];
				787	result[3] = aNext[3] - a[3];
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	788	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	789	store_vector4(inst, machine, result);
				790	#else
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	791	store_vector4(inst, machine, ZeroVec);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	792	#endif
				793	}
				794	break;
				795	case OPCODE_DDY: /* Partial derivative with respect to Y */
				796	{
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	797	#if 0
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	798	GLfloat a[4], aNext[4], result[4];
				799	struct gl_program_machine dMachine;
				800	if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
				801	column, result)) {
				802	init_machine_deriv(ctx, machine, program, span,
				803	'Y', &dMachine);
				804	fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
				805	execute_program(ctx, program, pc, &dMachine, span, column);
				806	fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
				807	aNext);
				808	result[0] = aNext[0] - a[0];
				809	result[1] = aNext[1] - a[1];
				810	result[2] = aNext[2] - a[2];
				811	result[3] = aNext[3] - a[3];
				812	}
				813	store_vector4(inst, machine, result);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	814	#else
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	815	store_vector4(inst, machine, ZeroVec);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	816	#endif
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	817	}
				818	break;
				819	case OPCODE_DP3:
				820	{
				821	GLfloat a[4], b[4], result[4];
				822	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				823	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				824	result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
				825	store_vector4(inst, machine, result);
				826	if (DEBUG_PROG) {
				827	printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
				828	result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	829	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	830	}
				831	break;
				832	case OPCODE_DP4:
				833	{
				834	GLfloat a[4], b[4], result[4];
				835	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				836	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				837	result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
				838	store_vector4(inst, machine, result);
				839	if (DEBUG_PROG) {
				840	printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
				841	result[0], a[0], a[1], a[2], a[3],
				842	b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	843	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	844	}
				845	break;
				846	case OPCODE_DPH:
				847	{
				848	GLfloat a[4], b[4], result[4];
				849	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				850	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				851	result[0] = result[1] = result[2] = result[3] =
				852	a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
				853	store_vector4(inst, machine, result);
				854	}
				855	break;
				856	case OPCODE_DST: /* Distance vector */
				857	{
				858	GLfloat a[4], b[4], result[4];
				859	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				860	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				861	result[0] = 1.0F;
				862	result[1] = a[1] * b[1];
				863	result[2] = a[2];
				864	result[3] = b[3];
				865	store_vector4(inst, machine, result);
				866	}
				867	break;
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	868	case OPCODE_EXP:
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	869	{
				870	GLfloat t[4], q[4], floor_t0;
				871	fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
				872	floor_t0 = FLOORF(t[0]);
				873	if (floor_t0 > FLT_MAX_EXP) {
				874	SET_POS_INFINITY(q[0]);
				875	SET_POS_INFINITY(q[2]);
				876	}
				877	else if (floor_t0 < FLT_MIN_EXP) {
				878	q[0] = 0.0F;
				879	q[2] = 0.0F;
				880	}
				881	else {
Brian	761728a	2007-02-24 11:14:57 -0700	[diff] [blame]	882	q[0] = LDEXPF(1.0, (int) floor_t0);
				883	/* Note: GL_NV_vertex_program expects
				884	* result.z = result.x * APPX(result.y)
				885	* We do what the ARB extension says.
				886	*/
				887	q[2] = pow(2.0, t[0]);
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	888	}
				889	q[1] = t[0] - floor_t0;
				890	q[3] = 1.0F;
				891	store_vector4( inst, machine, q );
				892	}
				893	break;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	894	case OPCODE_EX2: /* Exponential base 2 */
				895	{
				896	GLfloat a[4], result[4];
				897	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				898	result[0] = result[1] = result[2] = result[3] =
				899	(GLfloat) _mesa_pow(2.0, a[0]);
				900	store_vector4(inst, machine, result);
				901	}
				902	break;
				903	case OPCODE_FLR:
				904	{
				905	GLfloat a[4], result[4];
				906	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				907	result[0] = FLOORF(a[0]);
				908	result[1] = FLOORF(a[1]);
				909	result[2] = FLOORF(a[2]);
				910	result[3] = FLOORF(a[3]);
				911	store_vector4(inst, machine, result);
				912	}
				913	break;
				914	case OPCODE_FRC:
				915	{
				916	GLfloat a[4], result[4];
				917	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				918	result[0] = a[0] - FLOORF(a[0]);
				919	result[1] = a[1] - FLOORF(a[1]);
				920	result[2] = a[2] - FLOORF(a[2]);
				921	result[3] = a[3] - FLOORF(a[3]);
				922	store_vector4(inst, machine, result);
				923	}
				924	break;
				925	case OPCODE_IF:
				926	if (eval_condition(machine, inst)) {
				927	/* do if-clause (just continue execution) */
				928	}
				929	else {
				930	/* go to the instruction after ELSE or ENDIF */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	931	assert(inst->BranchTarget >= 0);
				932	pc = inst->BranchTarget - 1;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	933	}
				934	break;
				935	case OPCODE_ELSE:
				936	/* goto ENDIF */
				937	assert(inst->BranchTarget >= 0);
				938	pc = inst->BranchTarget - 1;
				939	break;
				940	case OPCODE_ENDIF:
				941	/* nothing */
				942	break;
				943	case OPCODE_INT: /* float to int */
				944	{
				945	GLfloat a[4], result[4];
				946	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				947	result[0] = (GLfloat) (GLint) a[0];
				948	result[1] = (GLfloat) (GLint) a[1];
				949	result[2] = (GLfloat) (GLint) a[2];
				950	result[3] = (GLfloat) (GLint) a[3];
				951	store_vector4(inst, machine, result);
				952	}
				953	break;
				954	case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
				955	if (eval_condition(machine, inst)) {
				956	return GL_FALSE;
				957	}
				958	break;
				959	case OPCODE_KIL: /* ARB_f_p only */
				960	{
				961	GLfloat a[4];
				962	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				963	if (a[0] < 0.0F \|\| a[1] < 0.0F \|\| a[2] < 0.0F \|\| a[3] < 0.0F) {
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	964	return GL_FALSE;
				965	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	966	}
				967	break;
				968	case OPCODE_LG2: /* log base 2 */
				969	{
				970	GLfloat a[4], result[4];
				971	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				972	result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
				973	store_vector4(inst, machine, result);
				974	}
				975	break;
				976	case OPCODE_LIT:
				977	{
				978	const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
				979	GLfloat a[4], result[4];
				980	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				981	a[0] = MAX2(a[0], 0.0F);
				982	a[1] = MAX2(a[1], 0.0F);
				983	/* XXX ARB version clamps a[3], NV version doesn't */
				984	a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
				985	result[0] = 1.0F;
				986	result[1] = a[0];
				987	/* XXX we could probably just use pow() here */
				988	if (a[0] > 0.0F) {
				989	if (a[1] == 0.0 && a[3] == 0.0)
				990	result[2] = 1.0;
				991	else
				992	result[2] = EXPF(a[3] * LOGF(a[1]));
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	993	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	994	else {
				995	result[2] = 0.0;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	996	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	997	result[3] = 1.0F;
				998	store_vector4(inst, machine, result);
				999	if (DEBUG_PROG) {
				1000	printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
				1001	result[0], result[1], result[2], result[3],
				1002	a[0], a[1], a[2], a[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1003	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1004	}
				1005	break;
Brian	f183a2d	2007-02-23 17:14:30 -0700	[diff] [blame]	1006	case OPCODE_LOG:
				1007	{
				1008	GLfloat t[4], q[4], abs_t0;
				1009	fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
				1010	abs_t0 = FABSF(t[0]);
				1011	if (abs_t0 != 0.0F) {
				1012	/* Since we really can't handle infinite values on VMS
				1013	* like other OSes we'll use __MAXFLOAT to represent
				1014	* infinity. This may need some tweaking.
				1015	*/
				1016	#ifdef VMS
				1017	if (abs_t0 == __MAXFLOAT)
				1018	#else
				1019	if (IS_INF_OR_NAN(abs_t0))
				1020	#endif
				1021	{
				1022	SET_POS_INFINITY(q[0]);
				1023	q[1] = 1.0F;
				1024	SET_POS_INFINITY(q[2]);
				1025	}
				1026	else {
				1027	int exponent;
				1028	GLfloat mantissa = FREXPF(t[0], &exponent);
				1029	q[0] = (GLfloat) (exponent - 1);
				1030	q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
				1031	q[2] = (GLfloat) (q[0] + LOG2(q[1]));
				1032	}
				1033	}
				1034	else {
				1035	SET_NEG_INFINITY(q[0]);
				1036	q[1] = 1.0F;
				1037	SET_NEG_INFINITY(q[2]);
				1038	}
				1039	q[3] = 1.0;
				1040	store_vector4(inst, machine, q);
				1041	}
				1042	break;
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1043	case OPCODE_LRP:
				1044	{
				1045	GLfloat a[4], b[4], c[4], result[4];
				1046	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1047	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1048	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				1049	result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
				1050	result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
				1051	result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
				1052	result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
				1053	store_vector4(inst, machine, result);
				1054	if (DEBUG_PROG) {
				1055	printf("LRP (%g %g %g %g) = (%g %g %g %g), "
				1056	"(%g %g %g %g), (%g %g %g %g)\n",
				1057	result[0], result[1], result[2], result[3],
				1058	a[0], a[1], a[2], a[3],
				1059	b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1060	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1061	}
				1062	break;
				1063	case OPCODE_MAD:
				1064	{
				1065	GLfloat a[4], b[4], c[4], result[4];
				1066	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1067	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1068	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				1069	result[0] = a[0] * b[0] + c[0];
				1070	result[1] = a[1] * b[1] + c[1];
				1071	result[2] = a[2] * b[2] + c[2];
				1072	result[3] = a[3] * b[3] + c[3];
				1073	store_vector4(inst, machine, result);
				1074	if (DEBUG_PROG) {
				1075	printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
				1076	"(%g %g %g %g) + (%g %g %g %g)\n",
				1077	result[0], result[1], result[2], result[3],
				1078	a[0], a[1], a[2], a[3],
				1079	b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1080	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1081	}
				1082	break;
				1083	case OPCODE_MAX:
				1084	{
				1085	GLfloat a[4], b[4], result[4];
				1086	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1087	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1088	result[0] = MAX2(a[0], b[0]);
				1089	result[1] = MAX2(a[1], b[1]);
				1090	result[2] = MAX2(a[2], b[2]);
				1091	result[3] = MAX2(a[3], b[3]);
				1092	store_vector4(inst, machine, result);
				1093	if (DEBUG_PROG) {
				1094	printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
				1095	result[0], result[1], result[2], result[3],
				1096	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1097	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1098	}
				1099	break;
				1100	case OPCODE_MIN:
				1101	{
				1102	GLfloat a[4], b[4], result[4];
				1103	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1104	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1105	result[0] = MIN2(a[0], b[0]);
				1106	result[1] = MIN2(a[1], b[1]);
				1107	result[2] = MIN2(a[2], b[2]);
				1108	result[3] = MIN2(a[3], b[3]);
				1109	store_vector4(inst, machine, result);
				1110	}
				1111	break;
				1112	case OPCODE_MOV:
				1113	{
				1114	GLfloat result[4];
				1115	fetch_vector4(ctx, &inst->SrcReg[0], machine, result);
				1116	store_vector4(inst, machine, result);
				1117	if (DEBUG_PROG) {
				1118	printf("MOV (%g %g %g %g)\n",
				1119	result[0], result[1], result[2], result[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1120	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1121	}
				1122	break;
				1123	case OPCODE_MUL:
				1124	{
				1125	GLfloat a[4], b[4], result[4];
				1126	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1127	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1128	result[0] = a[0] * b[0];
				1129	result[1] = a[1] * b[1];
				1130	result[2] = a[2] * b[2];
				1131	result[3] = a[3] * b[3];
				1132	store_vector4(inst, machine, result);
				1133	if (DEBUG_PROG) {
				1134	printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
				1135	result[0], result[1], result[2], result[3],
				1136	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1137	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1138	}
				1139	break;
				1140	case OPCODE_NOISE1:
				1141	{
				1142	GLfloat a[4], result[4];
				1143	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1144	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1145	result[1] =
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1146	result[2] = result[3] = _slang_library_noise1(a[0]);
				1147	store_vector4(inst, machine, result);
				1148	}
				1149	break;
				1150	case OPCODE_NOISE2:
				1151	{
				1152	GLfloat a[4], result[4];
				1153	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1154	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1155	result[1] =
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1156	result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
				1157	store_vector4(inst, machine, result);
				1158	}
				1159	break;
				1160	case OPCODE_NOISE3:
				1161	{
				1162	GLfloat a[4], result[4];
				1163	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1164	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1165	result[1] =
				1166	result[2] =
				1167	result[3] = _slang_library_noise3(a[0], a[1], a[2]);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1168	store_vector4(inst, machine, result);
				1169	}
				1170	break;
				1171	case OPCODE_NOISE4:
				1172	{
				1173	GLfloat a[4], result[4];
				1174	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1175	result[0] =
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1176	result[1] =
				1177	result[2] =
				1178	result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1179	store_vector4(inst, machine, result);
				1180	}
				1181	break;
				1182	case OPCODE_NOP:
				1183	break;
				1184	case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
				1185	{
				1186	GLfloat a[4], result[4];
				1187	GLhalfNV hx, hy;
				1188	GLuint rawResult = (GLuint ) result;
				1189	GLuint twoHalves;
				1190	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1191	hx = _mesa_float_to_half(a[0]);
				1192	hy = _mesa_float_to_half(a[1]);
				1193	twoHalves = hx \| (hy << 16);
				1194	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1195	= twoHalves;
				1196	store_vector4(inst, machine, result);
				1197	}
				1198	break;
				1199	case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
				1200	{
				1201	GLfloat a[4], result[4];
				1202	GLuint usx, usy, rawResult = (GLuint ) result;
				1203	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1204	a[0] = CLAMP(a[0], 0.0F, 1.0F);
				1205	a[1] = CLAMP(a[1], 0.0F, 1.0F);
				1206	usx = IROUND(a[0] * 65535.0F);
				1207	usy = IROUND(a[1] * 65535.0F);
				1208	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1209	= usx \| (usy << 16);
				1210	store_vector4(inst, machine, result);
				1211	}
				1212	break;
				1213	case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
				1214	{
				1215	GLfloat a[4], result[4];
				1216	GLuint ubx, uby, ubz, ubw, rawResult = (GLuint ) result;
				1217	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1218	a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
				1219	a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
				1220	a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
				1221	a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
				1222	ubx = IROUND(127.0F * a[0] + 128.0F);
				1223	uby = IROUND(127.0F * a[1] + 128.0F);
				1224	ubz = IROUND(127.0F * a[2] + 128.0F);
				1225	ubw = IROUND(127.0F * a[3] + 128.0F);
				1226	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1227	= ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
				1228	store_vector4(inst, machine, result);
				1229	}
				1230	break;
				1231	case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
				1232	{
				1233	GLfloat a[4], result[4];
				1234	GLuint ubx, uby, ubz, ubw, rawResult = (GLuint ) result;
				1235	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1236	a[0] = CLAMP(a[0], 0.0F, 1.0F);
				1237	a[1] = CLAMP(a[1], 0.0F, 1.0F);
				1238	a[2] = CLAMP(a[2], 0.0F, 1.0F);
				1239	a[3] = CLAMP(a[3], 0.0F, 1.0F);
				1240	ubx = IROUND(255.0F * a[0]);
				1241	uby = IROUND(255.0F * a[1]);
				1242	ubz = IROUND(255.0F * a[2]);
				1243	ubw = IROUND(255.0F * a[3]);
				1244	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1245	= ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
				1246	store_vector4(inst, machine, result);
				1247	}
				1248	break;
				1249	case OPCODE_POW:
				1250	{
				1251	GLfloat a[4], b[4], result[4];
				1252	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1253	fetch_vector1(ctx, &inst->SrcReg[1], machine, b);
				1254	result[0] = result[1] = result[2] = result[3]
				1255	= (GLfloat) _mesa_pow(a[0], b[0]);
				1256	store_vector4(inst, machine, result);
				1257	}
				1258	break;
				1259	case OPCODE_RCP:
				1260	{
				1261	GLfloat a[4], result[4];
				1262	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1263	if (DEBUG_PROG) {
				1264	if (a[0] == 0)
				1265	printf("RCP(0)\n");
				1266	else if (IS_INF_OR_NAN(a[0]))
				1267	printf("RCP(inf)\n");
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1268	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1269	result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
				1270	store_vector4(inst, machine, result);
				1271	}
				1272	break;
				1273	case OPCODE_RET: /* return from subroutine (conditional) */
				1274	if (eval_condition(machine, inst)) {
				1275	if (machine->StackDepth == 0) {
				1276	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1277	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1278	pc = machine->CallStack[--machine->StackDepth];
				1279	}
				1280	break;
				1281	case OPCODE_RFL: /* reflection vector */
				1282	{
				1283	GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
				1284	fetch_vector4(ctx, &inst->SrcReg[0], machine, axis);
				1285	fetch_vector4(ctx, &inst->SrcReg[1], machine, dir);
				1286	tmpW = DOT3(axis, axis);
				1287	tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
				1288	result[0] = tmpX * axis[0] - dir[0];
				1289	result[1] = tmpX * axis[1] - dir[1];
				1290	result[2] = tmpX * axis[2] - dir[2];
				1291	/* result[3] is never written! XXX enforce in parser! */
				1292	store_vector4(inst, machine, result);
				1293	}
				1294	break;
				1295	case OPCODE_RSQ: /* 1 / sqrt() */
				1296	{
				1297	GLfloat a[4], result[4];
				1298	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1299	a[0] = FABSF(a[0]);
				1300	result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
				1301	store_vector4(inst, machine, result);
				1302	if (DEBUG_PROG) {
				1303	printf("RSQ %g = 1/sqrt(\|%g\|)\n", result[0], a[0]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1304	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1305	}
				1306	break;
				1307	case OPCODE_SCS: /* sine and cos */
				1308	{
				1309	GLfloat a[4], result[4];
				1310	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1311	result[0] = (GLfloat) _mesa_cos(a[0]);
				1312	result[1] = (GLfloat) _mesa_sin(a[0]);
				1313	result[2] = 0.0; /* undefined! */
				1314	result[3] = 0.0; /* undefined! */
				1315	store_vector4(inst, machine, result);
				1316	}
				1317	break;
				1318	case OPCODE_SEQ: /* set on equal */
				1319	{
				1320	GLfloat a[4], b[4], result[4];
				1321	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1322	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1323	result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
				1324	result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
				1325	result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
				1326	result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
				1327	store_vector4(inst, machine, result);
				1328	}
				1329	break;
				1330	case OPCODE_SFL: /* set false, operands ignored */
				1331	{
				1332	static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
				1333	store_vector4(inst, machine, result);
				1334	}
				1335	break;
				1336	case OPCODE_SGE: /* set on greater or equal */
				1337	{
				1338	GLfloat a[4], b[4], result[4];
				1339	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1340	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1341	result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
				1342	result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
				1343	result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
				1344	result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
				1345	store_vector4(inst, machine, result);
				1346	}
				1347	break;
				1348	case OPCODE_SGT: /* set on greater */
				1349	{
				1350	GLfloat a[4], b[4], result[4];
				1351	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1352	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1353	result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
				1354	result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
				1355	result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
				1356	result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
				1357	store_vector4(inst, machine, result);
				1358	if (DEBUG_PROG) {
				1359	printf("SGT %g %g %g %g\n",
				1360	result[0], result[1], result[2], result[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1361	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1362	}
				1363	break;
				1364	case OPCODE_SIN:
				1365	{
				1366	GLfloat a[4], result[4];
				1367	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1368	result[0] = result[1] = result[2] = result[3]
				1369	= (GLfloat) _mesa_sin(a[0]);
				1370	store_vector4(inst, machine, result);
				1371	}
				1372	break;
				1373	case OPCODE_SLE: /* set on less or equal */
				1374	{
				1375	GLfloat a[4], b[4], result[4];
				1376	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1377	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1378	result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
				1379	result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
				1380	result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
				1381	result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
				1382	store_vector4(inst, machine, result);
				1383	}
				1384	break;
				1385	case OPCODE_SLT: /* set on less */
				1386	{
				1387	GLfloat a[4], b[4], result[4];
				1388	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1389	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1390	result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
				1391	result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
				1392	result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
				1393	result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
				1394	store_vector4(inst, machine, result);
				1395	}
				1396	break;
				1397	case OPCODE_SNE: /* set on not equal */
				1398	{
				1399	GLfloat a[4], b[4], result[4];
				1400	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1401	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1402	result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
				1403	result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
				1404	result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
				1405	result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
				1406	store_vector4(inst, machine, result);
				1407	}
				1408	break;
				1409	case OPCODE_STR: /* set true, operands ignored */
				1410	{
				1411	static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
				1412	store_vector4(inst, machine, result);
				1413	}
				1414	break;
				1415	case OPCODE_SUB:
				1416	{
				1417	GLfloat a[4], b[4], result[4];
				1418	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1419	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1420	result[0] = a[0] - b[0];
				1421	result[1] = a[1] - b[1];
				1422	result[2] = a[2] - b[2];
				1423	result[3] = a[3] - b[3];
				1424	store_vector4(inst, machine, result);
				1425	if (DEBUG_PROG) {
				1426	printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
				1427	result[0], result[1], result[2], result[3],
				1428	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1429	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1430	}
				1431	break;
				1432	case OPCODE_SWZ: /* extended swizzle */
				1433	{
				1434	const struct prog_src_register *source = &inst->SrcReg[0];
				1435	const GLfloat *src = get_register_pointer(ctx, source, machine);
				1436	GLfloat result[4];
				1437	GLuint i;
				1438	for (i = 0; i < 4; i++) {
				1439	const GLuint swz = GET_SWZ(source->Swizzle, i);
				1440	if (swz == SWIZZLE_ZERO)
				1441	result[i] = 0.0;
				1442	else if (swz == SWIZZLE_ONE)
				1443	result[i] = 1.0;
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1444	else {
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1445	ASSERT(swz >= 0);
				1446	ASSERT(swz <= 3);
				1447	result[i] = src[swz];
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1448	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1449	if (source->NegateBase & (1 << i))
				1450	result[i] = -result[i];
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1451	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1452	store_vector4(inst, machine, result);
				1453	}
				1454	break;
				1455	case OPCODE_TEX: /* Both ARB and NV frag prog */
				1456	/* Texel lookup */
				1457	{
				1458	/* Note: only use the precomputed lambda value when we're
				1459	* sampling texture unit [K] with texcoord[K].
				1460	* Otherwise, the lambda value may have no relation to the
				1461	* instruction's texcoord or texture image. Using the wrong
				1462	* lambda is usually bad news.
				1463	* The rest of the time, just use zero (until we get a more
				1464	* sophisticated way of computing lambda).
				1465	*/
				1466	GLfloat coord[4], color[4], lambda;
				1467	#if 0
				1468	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1469	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1470	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1471	else
				1472	#endif
				1473	lambda = 0.0;
				1474	fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
				1475	machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit,
				1476	color);
				1477	if (DEBUG_PROG) {
				1478	printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
				1479	"lod %f\n",
				1480	color[0], color[1], color[2], color[3],
				1481	inst->TexSrcUnit,
				1482	coord[0], coord[1], coord[2], coord[3], lambda);
				1483	}
				1484	store_vector4(inst, machine, color);
				1485	}
				1486	break;
				1487	case OPCODE_TXB: /* GL_ARB_fragment_program only */
				1488	/* Texel lookup with LOD bias */
				1489	{
				1490	const struct gl_texture_unit *texUnit
				1491	= &ctx->Texture.Unit[inst->TexSrcUnit];
				1492	GLfloat coord[4], color[4], lambda, bias;
				1493	#if 0
				1494	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1495	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1496	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1497	else
				1498	#endif
				1499	lambda = 0.0;
				1500	fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
				1501	/* coord[3] is the bias to add to lambda */
				1502	bias = texUnit->LodBias + coord[3];
				1503	if (texUnit->_Current)
				1504	bias += texUnit->_Current->LodBias;
				1505	machine->FetchTexelLod(ctx, coord, lambda + bias,
				1506	inst->TexSrcUnit, color);
				1507	store_vector4(inst, machine, color);
				1508	}
				1509	break;
				1510	case OPCODE_TXD: /* GL_NV_fragment_program only */
				1511	/* Texture lookup w/ partial derivatives for LOD */
				1512	{
				1513	GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
				1514	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
				1515	fetch_vector4(ctx, &inst->SrcReg[1], machine, dtdx);
				1516	fetch_vector4(ctx, &inst->SrcReg[2], machine, dtdy);
				1517	machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
				1518	inst->TexSrcUnit, color);
				1519	store_vector4(inst, machine, color);
				1520	}
				1521	break;
				1522	case OPCODE_TXP: /* GL_ARB_fragment_program only */
				1523	/* Texture lookup w/ projective divide */
				1524	{
				1525	GLfloat texcoord[4], color[4], lambda;
				1526	#if 0
				1527	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1528	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1529	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1530	else
				1531	#endif
				1532	lambda = 0.0;
				1533	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
				1534	/* Not so sure about this test - if texcoord[3] is
				1535	* zero, we'd probably be fine except for an ASSERT in
				1536	* IROUND_POS() which gets triggered by the inf values created.
				1537	*/
				1538	if (texcoord[3] != 0.0) {
				1539	texcoord[0] /= texcoord[3];
				1540	texcoord[1] /= texcoord[3];
				1541	texcoord[2] /= texcoord[3];
				1542	}
				1543	machine->FetchTexelLod(ctx, texcoord, lambda,
				1544	inst->TexSrcUnit, color);
				1545	store_vector4(inst, machine, color);
				1546	}
				1547	break;
				1548	case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
				1549	/* Texture lookup w/ projective divide */
				1550	{
				1551	GLfloat texcoord[4], color[4], lambda;
				1552	#if 0
				1553	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1554	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
				1555	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1556	else
				1557	#endif
				1558	lambda = 0.0;
				1559	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
				1560	if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
				1561	texcoord[3] != 0.0) {
				1562	texcoord[0] /= texcoord[3];
				1563	texcoord[1] /= texcoord[3];
				1564	texcoord[2] /= texcoord[3];
				1565	}
				1566	machine->FetchTexelLod(ctx, texcoord, lambda,
				1567	inst->TexSrcUnit, color);
				1568	store_vector4(inst, machine, color);
				1569	}
				1570	break;
				1571	case OPCODE_UP2H: /* unpack two 16-bit floats */
				1572	{
				1573	GLfloat a[4], result[4];
				1574	const GLuint rawBits = (const GLuint ) a;
				1575	GLhalfNV hx, hy;
				1576	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1577	hx = rawBits[0] & 0xffff;
				1578	hy = rawBits[0] >> 16;
				1579	result[0] = result[2] = _mesa_half_to_float(hx);
				1580	result[1] = result[3] = _mesa_half_to_float(hy);
				1581	store_vector4(inst, machine, result);
				1582	}
				1583	break;
				1584	case OPCODE_UP2US: /* unpack two GLushorts */
				1585	{
				1586	GLfloat a[4], result[4];
				1587	const GLuint rawBits = (const GLuint ) a;
				1588	GLushort usx, usy;
				1589	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1590	usx = rawBits[0] & 0xffff;
				1591	usy = rawBits[0] >> 16;
				1592	result[0] = result[2] = usx * (1.0f / 65535.0f);
				1593	result[1] = result[3] = usy * (1.0f / 65535.0f);
				1594	store_vector4(inst, machine, result);
				1595	}
				1596	break;
				1597	case OPCODE_UP4B: /* unpack four GLbytes */
				1598	{
				1599	GLfloat a[4], result[4];
				1600	const GLuint rawBits = (const GLuint ) a;
				1601	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1602	result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
				1603	result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
				1604	result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
				1605	result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
				1606	store_vector4(inst, machine, result);
				1607	}
				1608	break;
				1609	case OPCODE_UP4UB: /* unpack four GLubytes */
				1610	{
				1611	GLfloat a[4], result[4];
				1612	const GLuint rawBits = (const GLuint ) a;
				1613	fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
				1614	result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
				1615	result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
				1616	result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
				1617	result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
				1618	store_vector4(inst, machine, result);
				1619	}
				1620	break;
				1621	case OPCODE_XPD: /* cross product */
				1622	{
				1623	GLfloat a[4], b[4], result[4];
				1624	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1625	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1626	result[0] = a[1] * b[2] - a[2] * b[1];
				1627	result[1] = a[2] * b[0] - a[0] * b[2];
				1628	result[2] = a[0] * b[1] - a[1] * b[0];
				1629	result[3] = 1.0;
				1630	store_vector4(inst, machine, result);
				1631	}
				1632	break;
				1633	case OPCODE_X2D: /* 2-D matrix transform */
				1634	{
				1635	GLfloat a[4], b[4], c[4], result[4];
				1636	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1637	fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
				1638	fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
				1639	result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
				1640	result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
				1641	result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
				1642	result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
				1643	store_vector4(inst, machine, result);
				1644	}
				1645	break;
				1646	case OPCODE_PRINT:
				1647	{
				1648	if (inst->SrcReg[0].File != -1) {
				1649	GLfloat a[4];
				1650	fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
				1651	_mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
				1652	a[0], a[1], a[2], a[3]);
				1653	}
				1654	else {
				1655	_mesa_printf("%s\n", (const char *) inst->Data);
				1656	}
				1657	}
				1658	break;
				1659	case OPCODE_END:
				1660	return GL_TRUE;
				1661	default:
				1662	_mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
				1663	inst->Opcode);
				1664	return GL_TRUE; /* return value doesn't matter */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1665
				1666	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1667
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1668	total++;
				1669	if (total > MAX_EXEC) {
				1670	_mesa_problem(ctx, "Infinite loop detected in fragment program");
				1671	return GL_TRUE;
				1672	abort();
				1673	}
Brian	e80d901	2007-02-23 16:53:24 -0700	[diff] [blame]	1674
				1675	} /* for pc */
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1676
				1677	#if FEATURE_MESA_program_debug
				1678	CurrentMachine = NULL;
				1679	#endif
				1680
				1681	return GL_TRUE;
				1682	}