Blame - src/mesa/shader/prog_execute.c - platform/external/mesa3d

blob: d75a55b95b1191bd4af22b0ee34a79b4142ee8b4 [file] [log] [blame]

Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1	/*
				2	* Mesa 3-D graphics library
				3	* Version: 6.5.3
				4	*
				5	* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
				6	*
				7	* Permission is hereby granted, free of charge, to any person obtaining a
				8	* copy of this software and associated documentation files (the "Software"),
				9	* to deal in the Software without restriction, including without limitation
				10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				11	* and/or sell copies of the Software, and to permit persons to whom the
				12	* Software is furnished to do so, subject to the following conditions:
				13	*
				14	* The above copyright notice and this permission notice shall be included
				15	* in all copies or substantial portions of the Software.
				16	*
				17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
				18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				20	* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
				21	* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				22	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
				23	*/
				24
				25	/**
				26	* \file prog_execute.c
				27	* Software interpreter for vertex/fragment programs.
				28	* \author Brian Paul
				29	*/
				30
				31	/*
				32	* NOTE: we do everything in single-precision floating point; we don't
				33	* currently observe the single/half/fixed-precision qualifiers.
				34	*
				35	*/
				36
				37
				38	#include "glheader.h"
				39	#include "colormac.h"
				40	#include "context.h"
				41	#include "program.h"
				42	#include "prog_execute.h"
				43	#include "prog_instruction.h"
				44	#include "prog_parameter.h"
				45	#include "prog_print.h"
				46	#include "slang_library_noise.h"
				47
				48
				49	/* See comments below for info about this */
				50	#define LAMBDA_ZERO 1
				51
				52	/* debug predicate */
				53	#define DEBUG_PROG 0
				54
				55
				56	#if FEATURE_MESA_program_debug
				57	static struct gl_program_machine *CurrentMachine = NULL;
				58
				59	/**
				60	* For GL_MESA_program_debug.
				61	* Return current value (4*GLfloat) of a program register.
				62	* Called via ctx->Driver.GetFragmentProgramRegister().
				63	*/
				64	void
				65	_mesa_get_program_register(GLcontext *ctx, enum register_file file,
				66	GLuint index, GLfloat val[4])
				67	{
				68	if (CurrentMachine) {
				69	switch (file) {
				70	case PROGRAM_INPUT:
				71	if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
				72	COPY_4V(val, CurrentMachine->VertAttribs[index]);
				73	}
				74	else {
				75	COPY_4V(val,
				76	CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
				77	}
				78	break;
				79	case PROGRAM_OUTPUT:
				80	COPY_4V(val, CurrentMachine->Outputs[index]);
				81	break;
				82	case PROGRAM_TEMPORARY:
				83	COPY_4V(val, CurrentMachine->Temporaries[index]);
				84	break;
				85	default:
				86	_mesa_problem(NULL,
				87	"bad register file in _swrast_get_program_register");
				88	}
				89	}
				90	}
				91	#endif /* FEATURE_MESA_program_debug */
				92
				93
				94
				95	/**
				96	* Return a pointer to the 4-element float vector specified by the given
				97	* source register.
				98	*/
				99	static INLINE const GLfloat *
				100	get_register_pointer( GLcontext *ctx,
				101	const struct prog_src_register *source,
				102	const struct gl_program_machine *machine)
				103	{
				104	/* XXX relative addressing... */
				105	switch (source->File) {
				106	case PROGRAM_TEMPORARY:
				107	ASSERT(source->Index < MAX_PROGRAM_TEMPS);
				108	return machine->Temporaries[source->Index];
				109
				110	case PROGRAM_INPUT:
				111	if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
				112	ASSERT(source->Index < VERT_ATTRIB_MAX);
				113	return machine->VertAttribs[source->Index];
				114	}
				115	else {
				116	ASSERT(source->Index < FRAG_ATTRIB_MAX);
				117	return machine->Attribs[source->Index][machine->CurElement];
				118	}
				119
				120	case PROGRAM_OUTPUT:
				121	/* This is only for PRINT */
				122	ASSERT(source->Index < FRAG_RESULT_MAX);
				123	return machine->Outputs[source->Index];
				124
				125	case PROGRAM_LOCAL_PARAM:
				126	ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
				127	return machine->CurProgram->LocalParams[source->Index];
				128
				129	case PROGRAM_ENV_PARAM:
				130	ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
				131	if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
				132	return ctx->VertexProgram.Parameters[source->Index];
				133	else
				134	return ctx->FragmentProgram.Parameters[source->Index];
				135
				136	case PROGRAM_STATE_VAR:
				137	/* Fallthrough */
				138	case PROGRAM_CONSTANT:
				139	/* Fallthrough */
				140	case PROGRAM_UNIFORM:
				141	/* Fallthrough */
				142	case PROGRAM_NAMED_PARAM:
				143	ASSERT(source->Index <
				144	(GLint) machine->CurProgram->Parameters->NumParameters);
				145	return machine->CurProgram->Parameters->ParameterValues[source->Index];
				146
				147	default:
				148	_mesa_problem(ctx,
				149	"Invalid input register file %d in get_register_pointer()",
				150	source->File);
				151	return NULL;
				152	}
				153	}
				154
				155
				156	/**
				157	* Fetch a 4-element float vector from the given source register.
				158	* Apply swizzling and negating as needed.
				159	*/
				160	static void
				161	fetch_vector4( GLcontext *ctx,
				162	const struct prog_src_register *source,
				163	const struct gl_program_machine *machine,
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	164	GLfloat result[4] )
				165	{
				166	const GLfloat *src = get_register_pointer(ctx, source, machine);
				167	ASSERT(src);
				168
				169	if (source->Swizzle == SWIZZLE_NOOP) {
				170	/* no swizzling */
				171	COPY_4V(result, src);
				172	}
				173	else {
				174	ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
				175	ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
				176	ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
				177	ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
				178	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				179	result[1] = src[GET_SWZ(source->Swizzle, 1)];
				180	result[2] = src[GET_SWZ(source->Swizzle, 2)];
				181	result[3] = src[GET_SWZ(source->Swizzle, 3)];
				182	}
				183
				184	if (source->NegateBase) {
				185	result[0] = -result[0];
				186	result[1] = -result[1];
				187	result[2] = -result[2];
				188	result[3] = -result[3];
				189	}
				190	if (source->Abs) {
				191	result[0] = FABSF(result[0]);
				192	result[1] = FABSF(result[1]);
				193	result[2] = FABSF(result[2]);
				194	result[3] = FABSF(result[3]);
				195	}
				196	if (source->NegateAbs) {
				197	result[0] = -result[0];
				198	result[1] = -result[1];
				199	result[2] = -result[2];
				200	result[3] = -result[3];
				201	}
				202	}
				203
				204	#if 0
				205	/**
				206	* Fetch the derivative with respect to X for the given register.
				207	* \return GL_TRUE if it was easily computed or GL_FALSE if we
				208	* need to execute another instance of the program (ugh)!
				209	*/
				210	static GLboolean
				211	fetch_vector4_deriv( GLcontext *ctx,
				212	const struct prog_src_register *source,
				213	const SWspan *span,
				214	char xOrY, GLint column, GLfloat result[4] )
				215	{
				216	GLfloat src[4];
				217
				218	ASSERT(xOrY == 'X' \|\| xOrY == 'Y');
				219
				220	switch (source->Index) {
				221	case FRAG_ATTRIB_WPOS:
				222	if (xOrY == 'X') {
				223	src[0] = 1.0;
				224	src[1] = 0.0;
				225	src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
				226	/ ctx->DrawBuffer->_DepthMaxF;
				227	src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
				228	}
				229	else {
				230	src[0] = 0.0;
				231	src[1] = 1.0;
				232	src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
				233	/ ctx->DrawBuffer->_DepthMaxF;
				234	src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
				235	}
				236	break;
				237	case FRAG_ATTRIB_COL0:
				238	case FRAG_ATTRIB_COL1:
				239	if (xOrY == 'X') {
				240	src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
				241	src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
				242	src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
				243	src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
				244	}
				245	else {
				246	src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
				247	src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
				248	src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
				249	src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
				250	}
				251	break;
				252	case FRAG_ATTRIB_FOGC:
				253	if (xOrY == 'X') {
				254	src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
				255	src[1] = 0.0;
				256	src[2] = 0.0;
				257	src[3] = 0.0;
				258	}
				259	else {
				260	src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
				261	src[1] = 0.0;
				262	src[2] = 0.0;
				263	src[3] = 0.0;
				264	}
				265	break;
				266	default:
				267	assert(source->Index < FRAG_ATTRIB_MAX);
				268	/* texcoord or varying */
				269	if (xOrY == 'X') {
				270	/* this is a little tricky - I think I've got it right */
				271	const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
				272	+ span->attrStepX[source->Index][3] * column);
				273	src[0] = span->attrStepX[source->Index][0] * invQ;
				274	src[1] = span->attrStepX[source->Index][1] * invQ;
				275	src[2] = span->attrStepX[source->Index][2] * invQ;
				276	src[3] = span->attrStepX[source->Index][3] * invQ;
				277	}
				278	else {
				279	/* Tricky, as above, but in Y direction */
				280	const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
				281	+ span->attrStepY[source->Index][3]);
				282	src[0] = span->attrStepY[source->Index][0] * invQ;
				283	src[1] = span->attrStepY[source->Index][1] * invQ;
				284	src[2] = span->attrStepY[source->Index][2] * invQ;
				285	src[3] = span->attrStepY[source->Index][3] * invQ;
				286	}
				287	break;
				288	}
				289
				290	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				291	result[1] = src[GET_SWZ(source->Swizzle, 1)];
				292	result[2] = src[GET_SWZ(source->Swizzle, 2)];
				293	result[3] = src[GET_SWZ(source->Swizzle, 3)];
				294
				295	if (source->NegateBase) {
				296	result[0] = -result[0];
				297	result[1] = -result[1];
				298	result[2] = -result[2];
				299	result[3] = -result[3];
				300	}
				301	if (source->Abs) {
				302	result[0] = FABSF(result[0]);
				303	result[1] = FABSF(result[1]);
				304	result[2] = FABSF(result[2]);
				305	result[3] = FABSF(result[3]);
				306	}
				307	if (source->NegateAbs) {
				308	result[0] = -result[0];
				309	result[1] = -result[1];
				310	result[2] = -result[2];
				311	result[3] = -result[3];
				312	}
				313	return GL_TRUE;
				314	}
				315	#endif
				316
				317
				318	/**
				319	* As above, but only return result[0] element.
				320	*/
				321	static void
				322	fetch_vector1( GLcontext *ctx,
				323	const struct prog_src_register *source,
				324	const struct gl_program_machine *machine,
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	325	GLfloat result[4] )
				326	{
				327	const GLfloat *src = get_register_pointer(ctx, source, machine);
				328	ASSERT(src);
				329
				330	result[0] = src[GET_SWZ(source->Swizzle, 0)];
				331
				332	if (source->NegateBase) {
				333	result[0] = -result[0];
				334	}
				335	if (source->Abs) {
				336	result[0] = FABSF(result[0]);
				337	}
				338	if (source->NegateAbs) {
				339	result[0] = -result[0];
				340	}
				341	}
				342
				343
				344	/**
				345	* Test value against zero and return GT, LT, EQ or UN if NaN.
				346	*/
				347	static INLINE GLuint
				348	generate_cc( float value )
				349	{
				350	if (value != value)
				351	return COND_UN; /* NaN */
				352	if (value > 0.0F)
				353	return COND_GT;
				354	if (value < 0.0F)
				355	return COND_LT;
				356	return COND_EQ;
				357	}
				358
				359
				360	/**
				361	* Test if the ccMaskRule is satisfied by the given condition code.
				362	* Used to mask destination writes according to the current condition code.
				363	*/
				364	static INLINE GLboolean
				365	test_cc(GLuint condCode, GLuint ccMaskRule)
				366	{
				367	switch (ccMaskRule) {
				368	case COND_EQ: return (condCode == COND_EQ);
				369	case COND_NE: return (condCode != COND_EQ);
				370	case COND_LT: return (condCode == COND_LT);
				371	case COND_GE: return (condCode == COND_GT \|\| condCode == COND_EQ);
				372	case COND_LE: return (condCode == COND_LT \|\| condCode == COND_EQ);
				373	case COND_GT: return (condCode == COND_GT);
				374	case COND_TR: return GL_TRUE;
				375	case COND_FL: return GL_FALSE;
				376	default: return GL_TRUE;
				377	}
				378	}
				379
				380
				381	/**
				382	* Evaluate the 4 condition codes against a predicate and return GL_TRUE
				383	* or GL_FALSE to indicate result.
				384	*/
				385	static INLINE GLboolean
				386	eval_condition(const struct gl_program_machine *machine,
				387	const struct prog_instruction *inst)
				388	{
				389	const GLuint swizzle = inst->DstReg.CondSwizzle;
				390	const GLuint condMask = inst->DstReg.CondMask;
				391	if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) \|\|
				392	test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) \|\|
				393	test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) \|\|
				394	test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
				395	return GL_TRUE;
				396	}
				397	else {
				398	return GL_FALSE;
				399	}
				400	}
				401
				402
				403
				404	/**
				405	* Store 4 floats into a register. Observe the instructions saturate and
				406	* set-condition-code flags.
				407	*/
				408	static void
				409	store_vector4( const struct prog_instruction *inst,
				410	struct gl_program_machine *machine,
				411	const GLfloat value[4] )
				412	{
				413	const struct prog_dst_register *dest = &(inst->DstReg);
				414	const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
				415	GLfloat *dstReg;
				416	GLfloat dummyReg[4];
				417	GLfloat clampedValue[4];
				418	GLuint writeMask = dest->WriteMask;
				419
				420	switch (dest->File) {
				421	case PROGRAM_OUTPUT:
				422	dstReg = machine->Outputs[dest->Index];
				423	break;
				424	case PROGRAM_TEMPORARY:
				425	dstReg = machine->Temporaries[dest->Index];
				426	break;
				427	case PROGRAM_WRITE_ONLY:
				428	dstReg = dummyReg;
				429	return;
				430	default:
				431	_mesa_problem(NULL, "bad register file in store_vector4(fp)");
				432	return;
				433	}
				434
				435	#if 0
				436	if (value[0] > 1.0e10 \|\|
				437	IS_INF_OR_NAN(value[0]) \|\|
				438	IS_INF_OR_NAN(value[1]) \|\|
				439	IS_INF_OR_NAN(value[2]) \|\|
				440	IS_INF_OR_NAN(value[3]) )
				441	printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
				442	#endif
				443
				444	if (clamp) {
				445	clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
				446	clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
				447	clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
				448	clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
				449	value = clampedValue;
				450	}
				451
				452	if (dest->CondMask != COND_TR) {
				453	/* condition codes may turn off some writes */
				454	if (writeMask & WRITEMASK_X) {
				455	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
				456	dest->CondMask))
				457	writeMask &= ~WRITEMASK_X;
				458	}
				459	if (writeMask & WRITEMASK_Y) {
				460	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
				461	dest->CondMask))
				462	writeMask &= ~WRITEMASK_Y;
				463	}
				464	if (writeMask & WRITEMASK_Z) {
				465	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
				466	dest->CondMask))
				467	writeMask &= ~WRITEMASK_Z;
				468	}
				469	if (writeMask & WRITEMASK_W) {
				470	if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
				471	dest->CondMask))
				472	writeMask &= ~WRITEMASK_W;
				473	}
				474	}
				475
				476	if (writeMask & WRITEMASK_X)
				477	dstReg[0] = value[0];
				478	if (writeMask & WRITEMASK_Y)
				479	dstReg[1] = value[1];
				480	if (writeMask & WRITEMASK_Z)
				481	dstReg[2] = value[2];
				482	if (writeMask & WRITEMASK_W)
				483	dstReg[3] = value[3];
				484
				485	if (inst->CondUpdate) {
				486	if (writeMask & WRITEMASK_X)
				487	machine->CondCodes[0] = generate_cc(value[0]);
				488	if (writeMask & WRITEMASK_Y)
				489	machine->CondCodes[1] = generate_cc(value[1]);
				490	if (writeMask & WRITEMASK_Z)
				491	machine->CondCodes[2] = generate_cc(value[2]);
				492	if (writeMask & WRITEMASK_W)
				493	machine->CondCodes[3] = generate_cc(value[3]);
				494	}
				495	}
				496
				497
				498	#if 0
				499	/**
				500	* Initialize a new machine state instance from an existing one, adding
				501	* the partial derivatives onto the input registers.
				502	* Used to implement DDX and DDY instructions in non-trivial cases.
				503	*/
				504	static void
				505	init_machine_deriv( GLcontext *ctx,
				506	const struct gl_program_machine *machine,
				507	const struct gl_fragment_program *program,
				508	const SWspan *span, char xOrY,
				509	struct gl_program_machine *dMachine )
				510	{
				511	GLuint attr;
				512
				513	ASSERT(xOrY == 'X' \|\| xOrY == 'Y');
				514
				515	/* copy existing machine */
				516	_mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
				517
				518	if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
				519	/* XXX also need to do this when using valgrind */
				520	/* Clear temporary registers (undefined for ARB_f_p) */
				521	_mesa_bzero( (void*) machine->Temporaries,
				522	MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
				523	}
				524
				525	/* Add derivatives */
				526	if (program->Base.InputsRead & FRAG_BIT_WPOS) {
				527	GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
				528	if (xOrY == 'X') {
				529	wpos[0] += 1.0F;
				530	wpos[1] += 0.0F;
				531	wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
				532	wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
				533	}
				534	else {
				535	wpos[0] += 0.0F;
				536	wpos[1] += 1.0F;
				537	wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
				538	wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
				539	}
				540	}
				541
				542	/* primary, secondary colors */
				543	for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
				544	if (program->Base.InputsRead & (1 << attr)) {
				545	GLfloat *col = machine->Attribs[attr][machine->CurElement];
				546	if (xOrY == 'X') {
				547	col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
				548	col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
				549	col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
				550	col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
				551	}
				552	else {
				553	col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
				554	col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
				555	col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
				556	col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
				557	}
				558	}
				559	}
				560	if (program->Base.InputsRead & FRAG_BIT_FOGC) {
				561	GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
				562	if (xOrY == 'X') {
				563	fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
				564	}
				565	else {
				566	fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
				567	}
				568	}
				569	/* texcoord and varying vars */
				570	for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
				571	if (program->Base.InputsRead & (1 << attr)) {
				572	GLfloat *val = machine->Attribs[attr][machine->CurElement];
				573	/* XXX perspective-correct interpolation */
				574	if (xOrY == 'X') {
				575	val[0] += span->attrStepX[attr][0];
				576	val[1] += span->attrStepX[attr][1];
				577	val[2] += span->attrStepX[attr][2];
				578	val[3] += span->attrStepX[attr][3];
				579	}
				580	else {
				581	val[0] += span->attrStepY[attr][0];
				582	val[1] += span->attrStepY[attr][1];
				583	val[2] += span->attrStepY[attr][2];
				584	val[3] += span->attrStepY[attr][3];
				585	}
				586	}
				587	}
				588
				589	/* init condition codes */
				590	dMachine->CondCodes[0] = COND_EQ;
				591	dMachine->CondCodes[1] = COND_EQ;
				592	dMachine->CondCodes[2] = COND_EQ;
				593	dMachine->CondCodes[3] = COND_EQ;
				594	}
				595	#endif
				596
				597
				598	/**
				599	* Execute the given vertex/fragment program.
				600	*
				601	* \param ctx - rendering context
				602	* \param program - the fragment program to execute
				603	* \param machine - machine state (register file)
				604	* \param maxInst - max number of instructions to execute
				605	* \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
				606	*/
				607	GLboolean
				608	_mesa_execute_program(GLcontext *ctx,
				609	const struct gl_program *program, GLuint maxInst,
				610	struct gl_program_machine *machine, GLuint element)
				611	{
				612	const GLuint MAX_EXEC = 10000;
				613	GLint pc, total = 0;
				614
				615	machine->CurProgram = program;
				616
				617	if (DEBUG_PROG) {
				618	printf("execute program %u --------------------\n", program->Id);
				619	}
				620
				621	#if FEATURE_MESA_program_debug
				622	CurrentMachine = machine;
				623	#endif
				624
				625	for (pc = 0; pc < maxInst; pc++) {
				626	const struct prog_instruction *inst = program->Instructions + pc;
				627
				628	#if FEATURE_MESA_program_debug
				629	if (ctx->FragmentProgram.CallbackEnabled &&
				630	ctx->FragmentProgram.Callback) {
				631	ctx->FragmentProgram.CurrentPosition = inst->StringPos;
				632	ctx->FragmentProgram.Callback(program->Target,
				633	ctx->FragmentProgram.CallbackData);
				634	}
				635	#endif
				636
				637	if (DEBUG_PROG) {
				638	_mesa_print_instruction(inst);
				639	}
				640
				641	switch (inst->Opcode) {
				642	case OPCODE_ABS:
				643	{
				644	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	645	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	646	result[0] = FABSF(a[0]);
				647	result[1] = FABSF(a[1]);
				648	result[2] = FABSF(a[2]);
				649	result[3] = FABSF(a[3]);
				650	store_vector4( inst, machine, result );
				651	}
				652	break;
				653	case OPCODE_ADD:
				654	{
				655	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	656	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				657	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	658	result[0] = a[0] + b[0];
				659	result[1] = a[1] + b[1];
				660	result[2] = a[2] + b[2];
				661	result[3] = a[3] + b[3];
				662	store_vector4( inst, machine, result );
				663	if (DEBUG_PROG) {
				664	printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
				665	result[0], result[1], result[2], result[3],
				666	a[0], a[1], a[2], a[3],
				667	b[0], b[1], b[2], b[3]);
				668	}
				669	}
				670	break;
				671	case OPCODE_BGNLOOP:
				672	/* no-op */
				673	break;
				674	case OPCODE_ENDLOOP:
				675	/* subtract 1 here since pc is incremented by for(pc) loop */
				676	pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
				677	break;
				678	case OPCODE_BGNSUB: /* begin subroutine */
				679	break;
				680	case OPCODE_ENDSUB: /* end subroutine */
				681	break;
				682	case OPCODE_BRA: /* branch (conditional) */
				683	/* fall-through */
				684	case OPCODE_BRK: /* break out of loop (conditional) */
				685	/* fall-through */
				686	case OPCODE_CONT: /* continue loop (conditional) */
				687	if (eval_condition(machine, inst)) {
				688	/* take branch */
				689	/* Subtract 1 here since we'll do pc++ at end of for-loop */
				690	pc = inst->BranchTarget - 1;
				691	}
				692	break;
				693	case OPCODE_CAL: /* Call subroutine (conditional) */
				694	if (eval_condition(machine, inst)) {
				695	/* call the subroutine */
				696	if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
				697	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
				698	}
				699	machine->CallStack[machine->StackDepth++] = pc + 1;
				700	pc = inst->BranchTarget; /* XXX - 1 ??? */
				701	}
				702	break;
				703	case OPCODE_CMP:
				704	{
				705	GLfloat a[4], b[4], c[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	706	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				707	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
				708	fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	709	result[0] = a[0] < 0.0F ? b[0] : c[0];
				710	result[1] = a[1] < 0.0F ? b[1] : c[1];
				711	result[2] = a[2] < 0.0F ? b[2] : c[2];
				712	result[3] = a[3] < 0.0F ? b[3] : c[3];
				713	store_vector4( inst, machine, result );
				714	}
				715	break;
				716	case OPCODE_COS:
				717	{
				718	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	719	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	720	result[0] = result[1] = result[2] = result[3]
				721	= (GLfloat) _mesa_cos(a[0]);
				722	store_vector4( inst, machine, result );
				723	}
				724	break;
				725	case OPCODE_DDX: /* Partial derivative with respect to X */
				726	{
				727	#if 0
				728	GLfloat a[4], aNext[4], result[4];
				729	struct gl_program_machine dMachine;
				730	if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
				731	column, result)) {
				732	/* This is tricky. Make a copy of the current machine state,
				733	* increment the input registers by the dx or dy partial
				734	* derivatives, then re-execute the program up to the
				735	* preceeding instruction, then fetch the source register.
				736	* Finally, find the difference in the register values for
				737	* the original and derivative runs.
				738	*/
				739	fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
				740	init_machine_deriv(ctx, machine, program, span,
				741	'X', &dMachine);
				742	execute_program(ctx, program, pc, &dMachine, span, column);
				743	fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
				744	result[0] = aNext[0] - a[0];
				745	result[1] = aNext[1] - a[1];
				746	result[2] = aNext[2] - a[2];
				747	result[3] = aNext[3] - a[3];
				748	}
				749	store_vector4( inst, machine, result );
				750	#else
				751	static const GLfloat result[4] = { 0, 0, 0, 0 };
				752	store_vector4( inst, machine, result );
				753	#endif
				754	}
				755	break;
				756	case OPCODE_DDY: /* Partial derivative with respect to Y */
				757	{
				758	#if 0
				759	GLfloat a[4], aNext[4], result[4];
				760	struct gl_program_machine dMachine;
				761	if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
				762	column, result)) {
				763	init_machine_deriv(ctx, machine, program, span,
				764	'Y', &dMachine);
				765	fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
				766	execute_program(ctx, program, pc, &dMachine, span, column);
				767	fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
				768	result[0] = aNext[0] - a[0];
				769	result[1] = aNext[1] - a[1];
				770	result[2] = aNext[2] - a[2];
				771	result[3] = aNext[3] - a[3];
				772	}
				773	store_vector4( inst, machine, result );
				774	#else
				775	static const GLfloat result[4] = { 0, 0, 0, 0 };
				776	store_vector4( inst, machine, result );
				777	#endif
				778	}
				779	break;
				780	case OPCODE_DP3:
				781	{
				782	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	783	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				784	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	785	result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
				786	store_vector4( inst, machine, result );
				787	if (DEBUG_PROG) {
				788	printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
				789	result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
				790	}
				791	}
				792	break;
				793	case OPCODE_DP4:
				794	{
				795	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	796	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				797	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	798	result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
				799	store_vector4( inst, machine, result );
				800	if (DEBUG_PROG) {
				801	printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
				802	result[0], a[0], a[1], a[2], a[3],
				803	b[0], b[1], b[2], b[3]);
				804	}
				805	}
				806	break;
				807	case OPCODE_DPH:
				808	{
				809	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	810	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				811	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	812	result[0] = result[1] = result[2] = result[3] =
				813	a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
				814	store_vector4( inst, machine, result );
				815	}
				816	break;
				817	case OPCODE_DST: /* Distance vector */
				818	{
				819	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	820	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				821	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	822	result[0] = 1.0F;
				823	result[1] = a[1] * b[1];
				824	result[2] = a[2];
				825	result[3] = b[3];
				826	store_vector4( inst, machine, result );
				827	}
				828	break;
				829	case OPCODE_EX2: /* Exponential base 2 */
				830	{
				831	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	832	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	833	result[0] = result[1] = result[2] = result[3] =
				834	(GLfloat) _mesa_pow(2.0, a[0]);
				835	store_vector4( inst, machine, result );
				836	}
				837	break;
				838	case OPCODE_FLR:
				839	{
				840	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	841	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	842	result[0] = FLOORF(a[0]);
				843	result[1] = FLOORF(a[1]);
				844	result[2] = FLOORF(a[2]);
				845	result[3] = FLOORF(a[3]);
				846	store_vector4( inst, machine, result );
				847	}
				848	break;
				849	case OPCODE_FRC:
				850	{
				851	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	852	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	853	result[0] = a[0] - FLOORF(a[0]);
				854	result[1] = a[1] - FLOORF(a[1]);
				855	result[2] = a[2] - FLOORF(a[2]);
				856	result[3] = a[3] - FLOORF(a[3]);
				857	store_vector4( inst, machine, result );
				858	}
				859	break;
				860	case OPCODE_IF:
				861	if (eval_condition(machine, inst)) {
				862	/* do if-clause (just continue execution) */
				863	}
				864	else {
				865	/* go to the instruction after ELSE or ENDIF */
				866	assert(inst->BranchTarget >= 0);
				867	pc = inst->BranchTarget - 1;
				868	}
				869	break;
				870	case OPCODE_ELSE:
				871	/* goto ENDIF */
				872	assert(inst->BranchTarget >= 0);
				873	pc = inst->BranchTarget - 1;
				874	break;
				875	case OPCODE_ENDIF:
				876	/* nothing */
				877	break;
				878	case OPCODE_INT: /* float to int */
				879	{
				880	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	881	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	882	result[0] = (GLfloat) (GLint) a[0];
				883	result[1] = (GLfloat) (GLint) a[1];
				884	result[2] = (GLfloat) (GLint) a[2];
				885	result[3] = (GLfloat) (GLint) a[3];
				886	store_vector4( inst, machine, result );
				887	}
				888	break;
				889	case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
				890	if (eval_condition(machine, inst)) {
				891	return GL_FALSE;
				892	}
				893	break;
				894	case OPCODE_KIL: /* ARB_f_p only */
				895	{
				896	GLfloat a[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	897	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	898	if (a[0] < 0.0F \|\| a[1] < 0.0F \|\| a[2] < 0.0F \|\| a[3] < 0.0F) {
				899	return GL_FALSE;
				900	}
				901	}
				902	break;
				903	case OPCODE_LG2: /* log base 2 */
				904	{
				905	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	906	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	907	result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
				908	store_vector4( inst, machine, result );
				909	}
				910	break;
				911	case OPCODE_LIT:
				912	{
				913	const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
				914	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	915	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	916	a[0] = MAX2(a[0], 0.0F);
				917	a[1] = MAX2(a[1], 0.0F);
				918	/* XXX ARB version clamps a[3], NV version doesn't */
				919	a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
				920	result[0] = 1.0F;
				921	result[1] = a[0];
				922	/* XXX we could probably just use pow() here */
				923	if (a[0] > 0.0F) {
				924	if (a[1] == 0.0 && a[3] == 0.0)
				925	result[2] = 1.0;
				926	else
				927	result[2] = EXPF(a[3] * LOGF(a[1]));
				928	}
				929	else {
				930	result[2] = 0.0;
				931	}
				932	result[3] = 1.0F;
				933	store_vector4( inst, machine, result );
				934	if (DEBUG_PROG) {
				935	printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
				936	result[0], result[1], result[2], result[3],
				937	a[0], a[1], a[2], a[3]);
				938	}
				939	}
				940	break;
				941	case OPCODE_LRP:
				942	{
				943	GLfloat a[4], b[4], c[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	944	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				945	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
				946	fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	947	result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
				948	result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
				949	result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
				950	result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
				951	store_vector4( inst, machine, result );
				952	if (DEBUG_PROG) {
				953	printf("LRP (%g %g %g %g) = (%g %g %g %g), "
				954	"(%g %g %g %g), (%g %g %g %g)\n",
				955	result[0], result[1], result[2], result[3],
				956	a[0], a[1], a[2], a[3],
				957	b[0], b[1], b[2], b[3],
				958	c[0], c[1], c[2], c[3]);
				959	}
				960	}
				961	break;
				962	case OPCODE_MAD:
				963	{
				964	GLfloat a[4], b[4], c[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	965	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				966	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
				967	fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	968	result[0] = a[0] * b[0] + c[0];
				969	result[1] = a[1] * b[1] + c[1];
				970	result[2] = a[2] * b[2] + c[2];
				971	result[3] = a[3] * b[3] + c[3];
				972	store_vector4( inst, machine, result );
				973	if (DEBUG_PROG) {
				974	printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
				975	"(%g %g %g %g) + (%g %g %g %g)\n",
				976	result[0], result[1], result[2], result[3],
				977	a[0], a[1], a[2], a[3],
				978	b[0], b[1], b[2], b[3],
				979	c[0], c[1], c[2], c[3]);
				980	}
				981	}
				982	break;
				983	case OPCODE_MAX:
				984	{
				985	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	986	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				987	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	988	result[0] = MAX2(a[0], b[0]);
				989	result[1] = MAX2(a[1], b[1]);
				990	result[2] = MAX2(a[2], b[2]);
				991	result[3] = MAX2(a[3], b[3]);
				992	store_vector4( inst, machine, result );
				993	if (DEBUG_PROG) {
				994	printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
				995	result[0], result[1], result[2], result[3],
				996	a[0], a[1], a[2], a[3],
				997	b[0], b[1], b[2], b[3]);
				998	}
				999	}
				1000	break;
				1001	case OPCODE_MIN:
				1002	{
				1003	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1004	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1005	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1006	result[0] = MIN2(a[0], b[0]);
				1007	result[1] = MIN2(a[1], b[1]);
				1008	result[2] = MIN2(a[2], b[2]);
				1009	result[3] = MIN2(a[3], b[3]);
				1010	store_vector4( inst, machine, result );
				1011	}
				1012	break;
				1013	case OPCODE_MOV:
				1014	{
				1015	GLfloat result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1016	fetch_vector4( ctx, &inst->SrcReg[0], machine, result );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1017	store_vector4( inst, machine, result );
				1018	if (DEBUG_PROG) {
				1019	printf("MOV (%g %g %g %g)\n",
				1020	result[0], result[1], result[2], result[3]);
				1021	}
				1022	}
				1023	break;
				1024	case OPCODE_MUL:
				1025	{
				1026	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1027	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1028	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1029	result[0] = a[0] * b[0];
				1030	result[1] = a[1] * b[1];
				1031	result[2] = a[2] * b[2];
				1032	result[3] = a[3] * b[3];
				1033	store_vector4( inst, machine, result );
				1034	if (DEBUG_PROG) {
				1035	printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
				1036	result[0], result[1], result[2], result[3],
				1037	a[0], a[1], a[2], a[3],
				1038	b[0], b[1], b[2], b[3]);
				1039	}
				1040	}
				1041	break;
				1042	case OPCODE_NOISE1:
				1043	{
				1044	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1045	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1046	result[0] =
				1047	result[1] =
				1048	result[2] =
				1049	result[3] = _slang_library_noise1(a[0]);
				1050	store_vector4( inst, machine, result );
				1051	}
				1052	break;
				1053	case OPCODE_NOISE2:
				1054	{
				1055	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1056	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1057	result[0] =
				1058	result[1] =
				1059	result[2] =
				1060	result[3] = _slang_library_noise2(a[0], a[1]);
				1061	store_vector4( inst, machine, result );
				1062	}
				1063	break;
				1064	case OPCODE_NOISE3:
				1065	{
				1066	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1067	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1068	result[0] =
				1069	result[1] =
				1070	result[2] =
				1071	result[3] = _slang_library_noise3(a[0], a[1], a[2]);
				1072	store_vector4( inst, machine, result );
				1073	}
				1074	break;
				1075	case OPCODE_NOISE4:
				1076	{
				1077	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1078	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1079	result[0] =
				1080	result[1] =
				1081	result[2] =
				1082	result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
				1083	store_vector4( inst, machine, result );
				1084	}
				1085	break;
				1086	case OPCODE_NOP:
				1087	break;
				1088	case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
				1089	{
				1090	GLfloat a[4], result[4];
				1091	GLhalfNV hx, hy;
				1092	GLuint rawResult = (GLuint ) result;
				1093	GLuint twoHalves;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1094	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1095	hx = _mesa_float_to_half(a[0]);
				1096	hy = _mesa_float_to_half(a[1]);
				1097	twoHalves = hx \| (hy << 16);
				1098	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1099	= twoHalves;
				1100	store_vector4( inst, machine, result );
				1101	}
				1102	break;
				1103	case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
				1104	{
				1105	GLfloat a[4], result[4];
				1106	GLuint usx, usy, rawResult = (GLuint ) result;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1107	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1108	a[0] = CLAMP(a[0], 0.0F, 1.0F);
				1109	a[1] = CLAMP(a[1], 0.0F, 1.0F);
				1110	usx = IROUND(a[0] * 65535.0F);
				1111	usy = IROUND(a[1] * 65535.0F);
				1112	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1113	= usx \| (usy << 16);
				1114	store_vector4( inst, machine, result );
				1115	}
				1116	break;
				1117	case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
				1118	{
				1119	GLfloat a[4], result[4];
				1120	GLuint ubx, uby, ubz, ubw, rawResult = (GLuint ) result;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1121	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1122	a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
				1123	a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
				1124	a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
				1125	a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
				1126	ubx = IROUND(127.0F * a[0] + 128.0F);
				1127	uby = IROUND(127.0F * a[1] + 128.0F);
				1128	ubz = IROUND(127.0F * a[2] + 128.0F);
				1129	ubw = IROUND(127.0F * a[3] + 128.0F);
				1130	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1131	= ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
				1132	store_vector4( inst, machine, result );
				1133	}
				1134	break;
				1135	case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
				1136	{
				1137	GLfloat a[4], result[4];
				1138	GLuint ubx, uby, ubz, ubw, rawResult = (GLuint ) result;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1139	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1140	a[0] = CLAMP(a[0], 0.0F, 1.0F);
				1141	a[1] = CLAMP(a[1], 0.0F, 1.0F);
				1142	a[2] = CLAMP(a[2], 0.0F, 1.0F);
				1143	a[3] = CLAMP(a[3], 0.0F, 1.0F);
				1144	ubx = IROUND(255.0F * a[0]);
				1145	uby = IROUND(255.0F * a[1]);
				1146	ubz = IROUND(255.0F * a[2]);
				1147	ubw = IROUND(255.0F * a[3]);
				1148	rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
				1149	= ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
				1150	store_vector4( inst, machine, result );
				1151	}
				1152	break;
				1153	case OPCODE_POW:
				1154	{
				1155	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1156	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
				1157	fetch_vector1( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1158	result[0] = result[1] = result[2] = result[3]
				1159	= (GLfloat)_mesa_pow(a[0], b[0]);
				1160	store_vector4( inst, machine, result );
				1161	}
				1162	break;
				1163	case OPCODE_RCP:
				1164	{
				1165	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1166	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1167	if (DEBUG_PROG) {
				1168	if (a[0] == 0)
				1169	printf("RCP(0)\n");
				1170	else if (IS_INF_OR_NAN(a[0]))
				1171	printf("RCP(inf)\n");
				1172	}
				1173	result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
				1174	store_vector4( inst, machine, result );
				1175	}
				1176	break;
				1177	case OPCODE_RET: /* return from subroutine (conditional) */
				1178	if (eval_condition(machine, inst)) {
				1179	if (machine->StackDepth == 0) {
				1180	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
				1181	}
				1182	pc = machine->CallStack[--machine->StackDepth];
				1183	}
				1184	break;
				1185	case OPCODE_RFL: /* reflection vector */
				1186	{
				1187	GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1188	fetch_vector4( ctx, &inst->SrcReg[0], machine, axis );
				1189	fetch_vector4( ctx, &inst->SrcReg[1], machine, dir );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1190	tmpW = DOT3(axis, axis);
				1191	tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
				1192	result[0] = tmpX * axis[0] - dir[0];
				1193	result[1] = tmpX * axis[1] - dir[1];
				1194	result[2] = tmpX * axis[2] - dir[2];
				1195	/* result[3] is never written! XXX enforce in parser! */
				1196	store_vector4( inst, machine, result );
				1197	}
				1198	break;
				1199	case OPCODE_RSQ: /* 1 / sqrt() */
				1200	{
				1201	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1202	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1203	a[0] = FABSF(a[0]);
				1204	result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
				1205	store_vector4( inst, machine, result );
				1206	if (DEBUG_PROG) {
				1207	printf("RSQ %g = 1/sqrt(\|%g\|)\n", result[0], a[0]);
				1208	}
				1209	}
				1210	break;
				1211	case OPCODE_SCS: /* sine and cos */
				1212	{
				1213	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1214	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1215	result[0] = (GLfloat) _mesa_cos(a[0]);
				1216	result[1] = (GLfloat) _mesa_sin(a[0]);
				1217	result[2] = 0.0; /* undefined! */
				1218	result[3] = 0.0; /* undefined! */
				1219	store_vector4( inst, machine, result );
				1220	}
				1221	break;
				1222	case OPCODE_SEQ: /* set on equal */
				1223	{
				1224	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1225	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1226	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1227	result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
				1228	result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
				1229	result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
				1230	result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
				1231	store_vector4( inst, machine, result );
				1232	}
				1233	break;
				1234	case OPCODE_SFL: /* set false, operands ignored */
				1235	{
				1236	static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
				1237	store_vector4( inst, machine, result );
				1238	}
				1239	break;
				1240	case OPCODE_SGE: /* set on greater or equal */
				1241	{
				1242	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1243	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1244	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1245	result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
				1246	result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
				1247	result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
				1248	result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
				1249	store_vector4( inst, machine, result );
				1250	}
				1251	break;
				1252	case OPCODE_SGT: /* set on greater */
				1253	{
				1254	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1255	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1256	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1257	result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
				1258	result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
				1259	result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
				1260	result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
				1261	store_vector4( inst, machine, result );
				1262	if (DEBUG_PROG) {
				1263	printf("SGT %g %g %g %g\n",
				1264	result[0], result[1], result[2], result[3]);
				1265	}
				1266	}
				1267	break;
				1268	case OPCODE_SIN:
				1269	{
				1270	GLfloat a[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1271	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1272	result[0] = result[1] = result[2] = result[3]
				1273	= (GLfloat) _mesa_sin(a[0]);
				1274	store_vector4( inst, machine, result );
				1275	}
				1276	break;
				1277	case OPCODE_SLE: /* set on less or equal */
				1278	{
				1279	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1280	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1281	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1282	result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
				1283	result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
				1284	result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
				1285	result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
				1286	store_vector4( inst, machine, result );
				1287	}
				1288	break;
				1289	case OPCODE_SLT: /* set on less */
				1290	{
				1291	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1292	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1293	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1294	result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
				1295	result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
				1296	result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
				1297	result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
				1298	store_vector4( inst, machine, result );
				1299	}
				1300	break;
				1301	case OPCODE_SNE: /* set on not equal */
				1302	{
				1303	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1304	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1305	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1306	result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
				1307	result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
				1308	result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
				1309	result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
				1310	store_vector4( inst, machine, result );
				1311	}
				1312	break;
				1313	case OPCODE_STR: /* set true, operands ignored */
				1314	{
				1315	static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
				1316	store_vector4( inst, machine, result );
				1317	}
				1318	break;
				1319	case OPCODE_SUB:
				1320	{
				1321	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1322	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1323	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1324	result[0] = a[0] - b[0];
				1325	result[1] = a[1] - b[1];
				1326	result[2] = a[2] - b[2];
				1327	result[3] = a[3] - b[3];
				1328	store_vector4( inst, machine, result );
				1329	if (DEBUG_PROG) {
				1330	printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
				1331	result[0], result[1], result[2], result[3],
				1332	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
				1333	}
				1334	}
				1335	break;
				1336	case OPCODE_SWZ: /* extended swizzle */
				1337	{
				1338	const struct prog_src_register *source = &inst->SrcReg[0];
				1339	const GLfloat *src = get_register_pointer(ctx, source, machine);
				1340	GLfloat result[4];
				1341	GLuint i;
				1342	for (i = 0; i < 4; i++) {
				1343	const GLuint swz = GET_SWZ(source->Swizzle, i);
				1344	if (swz == SWIZZLE_ZERO)
				1345	result[i] = 0.0;
				1346	else if (swz == SWIZZLE_ONE)
				1347	result[i] = 1.0;
				1348	else {
				1349	ASSERT(swz >= 0);
				1350	ASSERT(swz <= 3);
				1351	result[i] = src[swz];
				1352	}
				1353	if (source->NegateBase & (1 << i))
				1354	result[i] = -result[i];
				1355	}
				1356	store_vector4( inst, machine, result );
				1357	}
				1358	break;
				1359	case OPCODE_TEX: /* Both ARB and NV frag prog */
				1360	/* Texel lookup */
				1361	{
				1362	/* Note: only use the precomputed lambda value when we're
				1363	* sampling texture unit [K] with texcoord[K].
				1364	* Otherwise, the lambda value may have no relation to the
				1365	* instruction's texcoord or texture image. Using the wrong
				1366	* lambda is usually bad news.
				1367	* The rest of the time, just use zero (until we get a more
				1368	* sophisticated way of computing lambda).
				1369	*/
				1370	GLfloat coord[4], color[4], lambda;
				1371	#if 0
				1372	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1373	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
				1374	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1375	else
				1376	#endif
				1377	lambda = 0.0;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1378	fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1379	machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit, color);
				1380	if (DEBUG_PROG) {
				1381	printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
				1382	"lod %f\n",
				1383	color[0], color[1], color[2], color[3],
				1384	inst->TexSrcUnit,
				1385	coord[0], coord[1], coord[2], coord[3], lambda);
				1386	}
				1387	store_vector4( inst, machine, color );
				1388	}
				1389	break;
				1390	case OPCODE_TXB: /* GL_ARB_fragment_program only */
				1391	/* Texel lookup with LOD bias */
				1392	{
				1393	const struct gl_texture_unit *texUnit
				1394	= &ctx->Texture.Unit[inst->TexSrcUnit];
				1395	GLfloat coord[4], color[4], lambda, bias;
				1396	#if 0
				1397	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1398	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
				1399	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1400	else
				1401	#endif
				1402	lambda = 0.0;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1403	fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1404	/* coord[3] is the bias to add to lambda */
				1405	bias = texUnit->LodBias + coord[3];
				1406	if (texUnit->_Current)
				1407	bias += texUnit->_Current->LodBias;
				1408	machine->FetchTexelLod(ctx, coord, lambda + bias,
				1409	inst->TexSrcUnit, color);
				1410	store_vector4( inst, machine, color );
				1411	}
				1412	break;
				1413	case OPCODE_TXD: /* GL_NV_fragment_program only */
				1414	/* Texture lookup w/ partial derivatives for LOD */
				1415	{
				1416	GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1417	fetch_vector4( ctx, &inst->SrcReg[0], machine, texcoord );
				1418	fetch_vector4( ctx, &inst->SrcReg[1], machine, dtdx );
				1419	fetch_vector4( ctx, &inst->SrcReg[2], machine, dtdy );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1420	machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
				1421	inst->TexSrcUnit, color );
				1422	store_vector4( inst, machine, color );
				1423	}
				1424	break;
				1425	case OPCODE_TXP: /* GL_ARB_fragment_program only */
				1426	/* Texture lookup w/ projective divide */
				1427	{
				1428	GLfloat texcoord[4], color[4], lambda;
				1429	#if 0
				1430	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1431	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
				1432	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1433	else
				1434	#endif
				1435	lambda = 0.0;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1436	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1437	/* Not so sure about this test - if texcoord[3] is
				1438	* zero, we'd probably be fine except for an ASSERT in
				1439	* IROUND_POS() which gets triggered by the inf values created.
				1440	*/
				1441	if (texcoord[3] != 0.0) {
				1442	texcoord[0] /= texcoord[3];
				1443	texcoord[1] /= texcoord[3];
				1444	texcoord[2] /= texcoord[3];
				1445	}
				1446	machine->FetchTexelLod(ctx, texcoord, lambda,
				1447	inst->TexSrcUnit, color);
				1448	store_vector4( inst, machine, color );
				1449	}
				1450	break;
				1451	case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
				1452	/* Texture lookup w/ projective divide */
				1453	{
				1454	GLfloat texcoord[4], color[4], lambda;
				1455	#if 0
				1456	if (inst->SrcReg[0].File == PROGRAM_INPUT &&
				1457	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
				1458	lambda = span->array->lambda[inst->TexSrcUnit][column];
				1459	else
				1460	#endif
				1461	lambda = 0.0;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1462	fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1463	if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
				1464	texcoord[3] != 0.0) {
				1465	texcoord[0] /= texcoord[3];
				1466	texcoord[1] /= texcoord[3];
				1467	texcoord[2] /= texcoord[3];
				1468	}
				1469	machine->FetchTexelLod(ctx, texcoord, lambda,
				1470	inst->TexSrcUnit, color);
				1471	store_vector4( inst, machine, color );
				1472	}
				1473	break;
				1474	case OPCODE_UP2H: /* unpack two 16-bit floats */
				1475	{
				1476	GLfloat a[4], result[4];
				1477	const GLuint rawBits = (const GLuint ) a;
				1478	GLhalfNV hx, hy;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1479	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1480	hx = rawBits[0] & 0xffff;
				1481	hy = rawBits[0] >> 16;
				1482	result[0] = result[2] = _mesa_half_to_float(hx);
				1483	result[1] = result[3] = _mesa_half_to_float(hy);
				1484	store_vector4( inst, machine, result );
				1485	}
				1486	break;
				1487	case OPCODE_UP2US: /* unpack two GLushorts */
				1488	{
				1489	GLfloat a[4], result[4];
				1490	const GLuint rawBits = (const GLuint ) a;
				1491	GLushort usx, usy;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1492	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1493	usx = rawBits[0] & 0xffff;
				1494	usy = rawBits[0] >> 16;
				1495	result[0] = result[2] = usx * (1.0f / 65535.0f);
				1496	result[1] = result[3] = usy * (1.0f / 65535.0f);
				1497	store_vector4( inst, machine, result );
				1498	}
				1499	break;
				1500	case OPCODE_UP4B: /* unpack four GLbytes */
				1501	{
				1502	GLfloat a[4], result[4];
				1503	const GLuint rawBits = (const GLuint ) a;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1504	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1505	result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
				1506	result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
				1507	result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
				1508	result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
				1509	store_vector4( inst, machine, result );
				1510	}
				1511	break;
				1512	case OPCODE_UP4UB: /* unpack four GLubytes */
				1513	{
				1514	GLfloat a[4], result[4];
				1515	const GLuint rawBits = (const GLuint ) a;
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1516	fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1517	result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
				1518	result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
				1519	result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
				1520	result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
				1521	store_vector4( inst, machine, result );
				1522	}
				1523	break;
				1524	case OPCODE_XPD: /* cross product */
				1525	{
				1526	GLfloat a[4], b[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1527	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1528	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1529	result[0] = a[1] * b[2] - a[2] * b[1];
				1530	result[1] = a[2] * b[0] - a[0] * b[2];
				1531	result[2] = a[0] * b[1] - a[1] * b[0];
				1532	result[3] = 1.0;
				1533	store_vector4( inst, machine, result );
				1534	}
				1535	break;
				1536	case OPCODE_X2D: /* 2-D matrix transform */
				1537	{
				1538	GLfloat a[4], b[4], c[4], result[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1539	fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
				1540	fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
				1541	fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1542	result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
				1543	result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
				1544	result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
				1545	result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
				1546	store_vector4( inst, machine, result );
				1547	}
				1548	break;
				1549	case OPCODE_PRINT:
				1550	{
				1551	if (inst->SrcReg[0].File != -1) {
				1552	GLfloat a[4];
Brian	6df328e	2007-02-23 16:48:07 -0700	[diff] [blame^]	1553	fetch_vector4( ctx, &inst->SrcReg[0], machine, a);
Brian	13e3b21	2007-02-22 16:09:40 -0700	[diff] [blame]	1554	_mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
				1555	a[0], a[1], a[2], a[3]);
				1556	}
				1557	else {
				1558	_mesa_printf("%s\n", (const char *) inst->Data);
				1559	}
				1560	}
				1561	break;
				1562	case OPCODE_END:
				1563	return GL_TRUE;
				1564	default:
				1565	_mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
				1566	inst->Opcode);
				1567	return GL_TRUE; /* return value doesn't matter */
				1568
				1569	}
				1570	total++;
				1571	if (total > MAX_EXEC) {
				1572	_mesa_problem(ctx, "Infinite loop detected in fragment program");
				1573	return GL_TRUE;
				1574	abort();
				1575	}
				1576	}
				1577
				1578	#if FEATURE_MESA_program_debug
				1579	CurrentMachine = NULL;
				1580	#endif
				1581
				1582	return GL_TRUE;
				1583	}