blob: 2db4e6952281798689533dc0cdfd20d2998fdce5 [file] [log] [blame]
Brian13e3b212007-02-22 16:09:40 -07001/*
2 * Mesa 3-D graphics library
3 * Version: 6.5.3
4 *
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31/*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38#include "glheader.h"
39#include "colormac.h"
40#include "context.h"
41#include "program.h"
42#include "prog_execute.h"
43#include "prog_instruction.h"
44#include "prog_parameter.h"
45#include "prog_print.h"
46#include "slang_library_noise.h"
47
48
49/* See comments below for info about this */
50#define LAMBDA_ZERO 1
51
52/* debug predicate */
53#define DEBUG_PROG 0
54
55
Brianf183a2d2007-02-23 17:14:30 -070056/**
57 * Set x to positive or negative infinity.
58 */
59#if defined(USE_IEEE) || defined(_WIN32)
60#define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
61#define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
62#elif defined(VMS)
63#define SET_POS_INFINITY(x) x = __MAXFLOAT
64#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
65#else
66#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
67#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
68#endif
69
70#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
71
72
73static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
74
75
76
Brian13e3b212007-02-22 16:09:40 -070077/**
78 * Return a pointer to the 4-element float vector specified by the given
79 * source register.
80 */
81static INLINE const GLfloat *
Brian33eac562007-02-25 18:52:41 -070082get_register_pointer(const struct prog_src_register *source,
Briane80d9012007-02-23 16:53:24 -070083 const struct gl_program_machine *machine)
Brian13e3b212007-02-22 16:09:40 -070084{
Brianf183a2d2007-02-23 17:14:30 -070085 if (source->RelAddr) {
86 const GLint reg = source->Index + machine->AddressReg[0][0];
Brianf183a2d2007-02-23 17:14:30 -070087 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
88 return ZeroVec;
89 else if (source->File == PROGRAM_ENV_PARAM)
Brian33eac562007-02-25 18:52:41 -070090 return machine->EnvParams[reg];
Brianf183a2d2007-02-23 17:14:30 -070091 else {
Brian761728a2007-02-24 11:14:57 -070092 ASSERT(source->File == PROGRAM_LOCAL_PARAM ||
93 source->File == PROGRAM_STATE_VAR);
Brianf183a2d2007-02-23 17:14:30 -070094 return machine->CurProgram->Parameters->ParameterValues[reg];
95 }
96 }
97
Brian13e3b212007-02-22 16:09:40 -070098 switch (source->File) {
99 case PROGRAM_TEMPORARY:
100 ASSERT(source->Index < MAX_PROGRAM_TEMPS);
101 return machine->Temporaries[source->Index];
102
103 case PROGRAM_INPUT:
104 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
105 ASSERT(source->Index < VERT_ATTRIB_MAX);
106 return machine->VertAttribs[source->Index];
107 }
108 else {
109 ASSERT(source->Index < FRAG_ATTRIB_MAX);
110 return machine->Attribs[source->Index][machine->CurElement];
111 }
112
113 case PROGRAM_OUTPUT:
Brian292a8042007-02-24 15:49:54 -0700114 ASSERT(source->Index < MAX_PROGRAM_OUTPUTS);
Brian13e3b212007-02-22 16:09:40 -0700115 return machine->Outputs[source->Index];
116
117 case PROGRAM_LOCAL_PARAM:
118 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
119 return machine->CurProgram->LocalParams[source->Index];
120
121 case PROGRAM_ENV_PARAM:
122 ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
Brian33eac562007-02-25 18:52:41 -0700123 return machine->EnvParams[source->Index];
Brian13e3b212007-02-22 16:09:40 -0700124
125 case PROGRAM_STATE_VAR:
126 /* Fallthrough */
127 case PROGRAM_CONSTANT:
128 /* Fallthrough */
129 case PROGRAM_UNIFORM:
130 /* Fallthrough */
131 case PROGRAM_NAMED_PARAM:
132 ASSERT(source->Index <
133 (GLint) machine->CurProgram->Parameters->NumParameters);
134 return machine->CurProgram->Parameters->ParameterValues[source->Index];
135
136 default:
Brian33eac562007-02-25 18:52:41 -0700137 _mesa_problem(NULL,
Brian13e3b212007-02-22 16:09:40 -0700138 "Invalid input register file %d in get_register_pointer()",
139 source->File);
140 return NULL;
141 }
142}
143
144
Brian6774f322007-02-25 18:39:46 -0700145#if FEATURE_MESA_program_debug
146static struct gl_program_machine *CurrentMachine = NULL;
147
148/**
149 * For GL_MESA_program_debug.
150 * Return current value (4*GLfloat) of a program register.
151 * Called via ctx->Driver.GetProgramRegister().
152 */
153void
154_mesa_get_program_register(GLcontext *ctx, enum register_file file,
155 GLuint index, GLfloat val[4])
156{
157 if (CurrentMachine) {
158 struct prog_src_register src;
159 const GLfloat *reg;
160 src.File = file;
161 src.Index = index;
Brian33eac562007-02-25 18:52:41 -0700162 reg = get_register_pointer(&src, CurrentMachine);
Brian6774f322007-02-25 18:39:46 -0700163 COPY_4V(val, reg);
164 }
165}
166#endif /* FEATURE_MESA_program_debug */
167
168
Brian13e3b212007-02-22 16:09:40 -0700169/**
170 * Fetch a 4-element float vector from the given source register.
171 * Apply swizzling and negating as needed.
172 */
173static void
Brian33eac562007-02-25 18:52:41 -0700174fetch_vector4(const struct prog_src_register *source,
Briane80d9012007-02-23 16:53:24 -0700175 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700176{
Brian33eac562007-02-25 18:52:41 -0700177 const GLfloat *src = get_register_pointer(source, machine);
Brian13e3b212007-02-22 16:09:40 -0700178 ASSERT(src);
179
180 if (source->Swizzle == SWIZZLE_NOOP) {
181 /* no swizzling */
182 COPY_4V(result, src);
183 }
184 else {
185 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
186 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
187 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
188 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
189 result[0] = src[GET_SWZ(source->Swizzle, 0)];
190 result[1] = src[GET_SWZ(source->Swizzle, 1)];
191 result[2] = src[GET_SWZ(source->Swizzle, 2)];
192 result[3] = src[GET_SWZ(source->Swizzle, 3)];
193 }
194
195 if (source->NegateBase) {
196 result[0] = -result[0];
197 result[1] = -result[1];
198 result[2] = -result[2];
199 result[3] = -result[3];
200 }
201 if (source->Abs) {
202 result[0] = FABSF(result[0]);
203 result[1] = FABSF(result[1]);
204 result[2] = FABSF(result[2]);
205 result[3] = FABSF(result[3]);
206 }
207 if (source->NegateAbs) {
208 result[0] = -result[0];
209 result[1] = -result[1];
210 result[2] = -result[2];
211 result[3] = -result[3];
212 }
213}
214
215#if 0
216/**
217 * Fetch the derivative with respect to X for the given register.
218 * \return GL_TRUE if it was easily computed or GL_FALSE if we
219 * need to execute another instance of the program (ugh)!
220 */
221static GLboolean
Briane80d9012007-02-23 16:53:24 -0700222fetch_vector4_deriv(GLcontext * ctx,
223 const struct prog_src_register *source,
224 const SWspan * span,
225 char xOrY, GLint column, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700226{
227 GLfloat src[4];
228
229 ASSERT(xOrY == 'X' || xOrY == 'Y');
230
231 switch (source->Index) {
232 case FRAG_ATTRIB_WPOS:
233 if (xOrY == 'X') {
234 src[0] = 1.0;
235 src[1] = 0.0;
236 src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700237 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700238 src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
239 }
240 else {
241 src[0] = 0.0;
242 src[1] = 1.0;
243 src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700244 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700245 src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
246 }
247 break;
248 case FRAG_ATTRIB_COL0:
249 case FRAG_ATTRIB_COL1:
250 if (xOrY == 'X') {
251 src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
252 src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
253 src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
254 src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
255 }
256 else {
257 src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
258 src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
259 src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
260 src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
261 }
262 break;
263 case FRAG_ATTRIB_FOGC:
264 if (xOrY == 'X') {
265 src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
266 src[1] = 0.0;
267 src[2] = 0.0;
268 src[3] = 0.0;
269 }
270 else {
271 src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
272 src[1] = 0.0;
273 src[2] = 0.0;
274 src[3] = 0.0;
275 }
276 break;
277 default:
278 assert(source->Index < FRAG_ATTRIB_MAX);
279 /* texcoord or varying */
280 if (xOrY == 'X') {
281 /* this is a little tricky - I think I've got it right */
282 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
Briane80d9012007-02-23 16:53:24 -0700283 +
284 span->attrStepX[source->Index][3] *
285 column);
Brian13e3b212007-02-22 16:09:40 -0700286 src[0] = span->attrStepX[source->Index][0] * invQ;
287 src[1] = span->attrStepX[source->Index][1] * invQ;
288 src[2] = span->attrStepX[source->Index][2] * invQ;
289 src[3] = span->attrStepX[source->Index][3] * invQ;
290 }
291 else {
292 /* Tricky, as above, but in Y direction */
293 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
294 + span->attrStepY[source->Index][3]);
295 src[0] = span->attrStepY[source->Index][0] * invQ;
296 src[1] = span->attrStepY[source->Index][1] * invQ;
297 src[2] = span->attrStepY[source->Index][2] * invQ;
298 src[3] = span->attrStepY[source->Index][3] * invQ;
299 }
300 break;
301 }
302
303 result[0] = src[GET_SWZ(source->Swizzle, 0)];
304 result[1] = src[GET_SWZ(source->Swizzle, 1)];
305 result[2] = src[GET_SWZ(source->Swizzle, 2)];
306 result[3] = src[GET_SWZ(source->Swizzle, 3)];
307
308 if (source->NegateBase) {
309 result[0] = -result[0];
310 result[1] = -result[1];
311 result[2] = -result[2];
312 result[3] = -result[3];
313 }
314 if (source->Abs) {
315 result[0] = FABSF(result[0]);
316 result[1] = FABSF(result[1]);
317 result[2] = FABSF(result[2]);
318 result[3] = FABSF(result[3]);
319 }
320 if (source->NegateAbs) {
321 result[0] = -result[0];
322 result[1] = -result[1];
323 result[2] = -result[2];
324 result[3] = -result[3];
325 }
326 return GL_TRUE;
327}
328#endif
329
330
331/**
332 * As above, but only return result[0] element.
333 */
334static void
Brian33eac562007-02-25 18:52:41 -0700335fetch_vector1(const struct prog_src_register *source,
Briane80d9012007-02-23 16:53:24 -0700336 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700337{
Brian33eac562007-02-25 18:52:41 -0700338 const GLfloat *src = get_register_pointer(source, machine);
Brian13e3b212007-02-22 16:09:40 -0700339 ASSERT(src);
340
341 result[0] = src[GET_SWZ(source->Swizzle, 0)];
342
343 if (source->NegateBase) {
344 result[0] = -result[0];
345 }
346 if (source->Abs) {
347 result[0] = FABSF(result[0]);
348 }
349 if (source->NegateAbs) {
350 result[0] = -result[0];
351 }
352}
353
354
355/**
356 * Test value against zero and return GT, LT, EQ or UN if NaN.
357 */
358static INLINE GLuint
Briane80d9012007-02-23 16:53:24 -0700359generate_cc(float value)
Brian13e3b212007-02-22 16:09:40 -0700360{
361 if (value != value)
Briane80d9012007-02-23 16:53:24 -0700362 return COND_UN; /* NaN */
Brian13e3b212007-02-22 16:09:40 -0700363 if (value > 0.0F)
364 return COND_GT;
365 if (value < 0.0F)
366 return COND_LT;
367 return COND_EQ;
368}
369
370
371/**
372 * Test if the ccMaskRule is satisfied by the given condition code.
373 * Used to mask destination writes according to the current condition code.
374 */
375static INLINE GLboolean
376test_cc(GLuint condCode, GLuint ccMaskRule)
377{
378 switch (ccMaskRule) {
379 case COND_EQ: return (condCode == COND_EQ);
380 case COND_NE: return (condCode != COND_EQ);
381 case COND_LT: return (condCode == COND_LT);
382 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
383 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
384 case COND_GT: return (condCode == COND_GT);
385 case COND_TR: return GL_TRUE;
386 case COND_FL: return GL_FALSE;
387 default: return GL_TRUE;
388 }
389}
390
391
392/**
393 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
394 * or GL_FALSE to indicate result.
395 */
396static INLINE GLboolean
397eval_condition(const struct gl_program_machine *machine,
398 const struct prog_instruction *inst)
399{
400 const GLuint swizzle = inst->DstReg.CondSwizzle;
401 const GLuint condMask = inst->DstReg.CondMask;
402 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
403 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
404 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
405 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
406 return GL_TRUE;
407 }
408 else {
409 return GL_FALSE;
410 }
411}
412
413
414
415/**
416 * Store 4 floats into a register. Observe the instructions saturate and
417 * set-condition-code flags.
418 */
419static void
Briane80d9012007-02-23 16:53:24 -0700420store_vector4(const struct prog_instruction *inst,
421 struct gl_program_machine *machine, const GLfloat value[4])
Brian13e3b212007-02-22 16:09:40 -0700422{
423 const struct prog_dst_register *dest = &(inst->DstReg);
424 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
425 GLfloat *dstReg;
426 GLfloat dummyReg[4];
427 GLfloat clampedValue[4];
428 GLuint writeMask = dest->WriteMask;
429
430 switch (dest->File) {
Briane80d9012007-02-23 16:53:24 -0700431 case PROGRAM_OUTPUT:
Brian292a8042007-02-24 15:49:54 -0700432 ASSERT(dest->Index < MAX_PROGRAM_OUTPUTS);
Briane80d9012007-02-23 16:53:24 -0700433 dstReg = machine->Outputs[dest->Index];
434 break;
435 case PROGRAM_TEMPORARY:
Brian292a8042007-02-24 15:49:54 -0700436 ASSERT(dest->Index < MAX_PROGRAM_TEMPS);
Briane80d9012007-02-23 16:53:24 -0700437 dstReg = machine->Temporaries[dest->Index];
438 break;
439 case PROGRAM_WRITE_ONLY:
440 dstReg = dummyReg;
441 return;
442 default:
443 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
444 return;
Brian13e3b212007-02-22 16:09:40 -0700445 }
446
447#if 0
448 if (value[0] > 1.0e10 ||
449 IS_INF_OR_NAN(value[0]) ||
450 IS_INF_OR_NAN(value[1]) ||
Briane80d9012007-02-23 16:53:24 -0700451 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
Brian13e3b212007-02-22 16:09:40 -0700452 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
453#endif
454
455 if (clamp) {
456 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
457 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
458 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
459 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
460 value = clampedValue;
461 }
462
463 if (dest->CondMask != COND_TR) {
464 /* condition codes may turn off some writes */
465 if (writeMask & WRITEMASK_X) {
466 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
467 dest->CondMask))
468 writeMask &= ~WRITEMASK_X;
469 }
470 if (writeMask & WRITEMASK_Y) {
471 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
472 dest->CondMask))
473 writeMask &= ~WRITEMASK_Y;
474 }
475 if (writeMask & WRITEMASK_Z) {
476 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
477 dest->CondMask))
478 writeMask &= ~WRITEMASK_Z;
479 }
480 if (writeMask & WRITEMASK_W) {
481 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
482 dest->CondMask))
483 writeMask &= ~WRITEMASK_W;
484 }
485 }
486
487 if (writeMask & WRITEMASK_X)
488 dstReg[0] = value[0];
489 if (writeMask & WRITEMASK_Y)
490 dstReg[1] = value[1];
491 if (writeMask & WRITEMASK_Z)
492 dstReg[2] = value[2];
493 if (writeMask & WRITEMASK_W)
494 dstReg[3] = value[3];
495
496 if (inst->CondUpdate) {
497 if (writeMask & WRITEMASK_X)
498 machine->CondCodes[0] = generate_cc(value[0]);
499 if (writeMask & WRITEMASK_Y)
500 machine->CondCodes[1] = generate_cc(value[1]);
501 if (writeMask & WRITEMASK_Z)
502 machine->CondCodes[2] = generate_cc(value[2]);
503 if (writeMask & WRITEMASK_W)
504 machine->CondCodes[3] = generate_cc(value[3]);
505 }
506}
507
508
509#if 0
510/**
511 * Initialize a new machine state instance from an existing one, adding
512 * the partial derivatives onto the input registers.
513 * Used to implement DDX and DDY instructions in non-trivial cases.
514 */
515static void
Briane80d9012007-02-23 16:53:24 -0700516init_machine_deriv(GLcontext * ctx,
517 const struct gl_program_machine *machine,
518 const struct gl_fragment_program *program,
519 const SWspan * span, char xOrY,
520 struct gl_program_machine *dMachine)
Brian13e3b212007-02-22 16:09:40 -0700521{
522 GLuint attr;
523
524 ASSERT(xOrY == 'X' || xOrY == 'Y');
525
526 /* copy existing machine */
527 _mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
528
529 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
530 /* XXX also need to do this when using valgrind */
531 /* Clear temporary registers (undefined for ARB_f_p) */
Briane80d9012007-02-23 16:53:24 -0700532 _mesa_bzero((void *) machine->Temporaries,
533 MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
Brian13e3b212007-02-22 16:09:40 -0700534 }
535
536 /* Add derivatives */
537 if (program->Base.InputsRead & FRAG_BIT_WPOS) {
538 GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
539 if (xOrY == 'X') {
540 wpos[0] += 1.0F;
541 wpos[1] += 0.0F;
542 wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
543 wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
544 }
545 else {
546 wpos[0] += 0.0F;
547 wpos[1] += 1.0F;
548 wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
549 wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
550 }
551 }
552
553 /* primary, secondary colors */
554 for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
555 if (program->Base.InputsRead & (1 << attr)) {
556 GLfloat *col = machine->Attribs[attr][machine->CurElement];
557 if (xOrY == 'X') {
558 col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
559 col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
560 col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
561 col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
562 }
563 else {
564 col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
565 col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
566 col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
567 col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
568 }
569 }
570 }
571 if (program->Base.InputsRead & FRAG_BIT_FOGC) {
572 GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
573 if (xOrY == 'X') {
574 fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
575 }
576 else {
577 fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
578 }
579 }
580 /* texcoord and varying vars */
581 for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
582 if (program->Base.InputsRead & (1 << attr)) {
583 GLfloat *val = machine->Attribs[attr][machine->CurElement];
584 /* XXX perspective-correct interpolation */
585 if (xOrY == 'X') {
586 val[0] += span->attrStepX[attr][0];
587 val[1] += span->attrStepX[attr][1];
588 val[2] += span->attrStepX[attr][2];
589 val[3] += span->attrStepX[attr][3];
590 }
591 else {
592 val[0] += span->attrStepY[attr][0];
593 val[1] += span->attrStepY[attr][1];
594 val[2] += span->attrStepY[attr][2];
595 val[3] += span->attrStepY[attr][3];
596 }
597 }
598 }
599
600 /* init condition codes */
601 dMachine->CondCodes[0] = COND_EQ;
602 dMachine->CondCodes[1] = COND_EQ;
603 dMachine->CondCodes[2] = COND_EQ;
604 dMachine->CondCodes[3] = COND_EQ;
605}
606#endif
607
608
609/**
610 * Execute the given vertex/fragment program.
611 *
612 * \param ctx - rendering context
613 * \param program - the fragment program to execute
614 * \param machine - machine state (register file)
Brian13e3b212007-02-22 16:09:40 -0700615 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
616 */
617GLboolean
Briane80d9012007-02-23 16:53:24 -0700618_mesa_execute_program(GLcontext * ctx,
Brian8b34b7d2007-02-25 18:26:50 -0700619 const struct gl_program *program,
Brian085d7d52007-02-25 18:23:37 -0700620 struct gl_program_machine *machine)
Brian13e3b212007-02-22 16:09:40 -0700621{
Brian8b34b7d2007-02-25 18:26:50 -0700622 const GLuint numInst = program->NumInstructions;
Briancfd00112007-02-25 18:30:45 -0700623 const GLuint maxExec = 10000;
624 GLint pc, numExec = 0;
Brian13e3b212007-02-22 16:09:40 -0700625
626 machine->CurProgram = program;
627
628 if (DEBUG_PROG) {
629 printf("execute program %u --------------------\n", program->Id);
630 }
631
632#if FEATURE_MESA_program_debug
633 CurrentMachine = machine;
634#endif
635
Brian33eac562007-02-25 18:52:41 -0700636 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
637 machine->EnvParams = ctx->VertexProgram.Parameters;
638 }
639 else {
640 machine->EnvParams = ctx->FragmentProgram.Parameters;
641 }
642
Brian8b34b7d2007-02-25 18:26:50 -0700643 for (pc = 0; pc < numInst; pc++) {
Brian13e3b212007-02-22 16:09:40 -0700644 const struct prog_instruction *inst = program->Instructions + pc;
645
646#if FEATURE_MESA_program_debug
647 if (ctx->FragmentProgram.CallbackEnabled &&
648 ctx->FragmentProgram.Callback) {
649 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
650 ctx->FragmentProgram.Callback(program->Target,
651 ctx->FragmentProgram.CallbackData);
652 }
653#endif
654
655 if (DEBUG_PROG) {
656 _mesa_print_instruction(inst);
657 }
658
659 switch (inst->Opcode) {
Briane80d9012007-02-23 16:53:24 -0700660 case OPCODE_ABS:
661 {
662 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700663 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700664 result[0] = FABSF(a[0]);
665 result[1] = FABSF(a[1]);
666 result[2] = FABSF(a[2]);
667 result[3] = FABSF(a[3]);
668 store_vector4(inst, machine, result);
669 }
670 break;
671 case OPCODE_ADD:
672 {
673 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700674 fetch_vector4(&inst->SrcReg[0], machine, a);
675 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700676 result[0] = a[0] + b[0];
677 result[1] = a[1] + b[1];
678 result[2] = a[2] + b[2];
679 result[3] = a[3] + b[3];
680 store_vector4(inst, machine, result);
681 if (DEBUG_PROG) {
682 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
683 result[0], result[1], result[2], result[3],
684 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700685 }
Briane80d9012007-02-23 16:53:24 -0700686 }
687 break;
Brianf183a2d2007-02-23 17:14:30 -0700688 case OPCODE_ARL:
689 {
690 GLfloat t[4];
Brian33eac562007-02-25 18:52:41 -0700691 fetch_vector4(&inst->SrcReg[0], machine, t);
Brianf183a2d2007-02-23 17:14:30 -0700692 machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
693 }
694 break;
Briane80d9012007-02-23 16:53:24 -0700695 case OPCODE_BGNLOOP:
696 /* no-op */
697 break;
698 case OPCODE_ENDLOOP:
699 /* subtract 1 here since pc is incremented by for(pc) loop */
700 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
701 break;
702 case OPCODE_BGNSUB: /* begin subroutine */
703 break;
704 case OPCODE_ENDSUB: /* end subroutine */
705 break;
706 case OPCODE_BRA: /* branch (conditional) */
707 /* fall-through */
708 case OPCODE_BRK: /* break out of loop (conditional) */
709 /* fall-through */
710 case OPCODE_CONT: /* continue loop (conditional) */
711 if (eval_condition(machine, inst)) {
712 /* take branch */
713 /* Subtract 1 here since we'll do pc++ at end of for-loop */
714 pc = inst->BranchTarget - 1;
715 }
716 break;
717 case OPCODE_CAL: /* Call subroutine (conditional) */
718 if (eval_condition(machine, inst)) {
719 /* call the subroutine */
720 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
721 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -0700722 }
Briane80d9012007-02-23 16:53:24 -0700723 machine->CallStack[machine->StackDepth++] = pc + 1;
724 pc = inst->BranchTarget; /* XXX - 1 ??? */
725 }
726 break;
727 case OPCODE_CMP:
728 {
729 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700730 fetch_vector4(&inst->SrcReg[0], machine, a);
731 fetch_vector4(&inst->SrcReg[1], machine, b);
732 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -0700733 result[0] = a[0] < 0.0F ? b[0] : c[0];
734 result[1] = a[1] < 0.0F ? b[1] : c[1];
735 result[2] = a[2] < 0.0F ? b[2] : c[2];
736 result[3] = a[3] < 0.0F ? b[3] : c[3];
737 store_vector4(inst, machine, result);
738 }
739 break;
740 case OPCODE_COS:
741 {
742 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700743 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700744 result[0] = result[1] = result[2] = result[3]
745 = (GLfloat) _mesa_cos(a[0]);
746 store_vector4(inst, machine, result);
747 }
748 break;
749 case OPCODE_DDX: /* Partial derivative with respect to X */
750 {
Brian13e3b212007-02-22 16:09:40 -0700751#if 0
Briane80d9012007-02-23 16:53:24 -0700752 GLfloat a[4], aNext[4], result[4];
753 struct gl_program_machine dMachine;
754 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
755 column, result)) {
756 /* This is tricky. Make a copy of the current machine state,
757 * increment the input registers by the dx or dy partial
758 * derivatives, then re-execute the program up to the
759 * preceeding instruction, then fetch the source register.
760 * Finally, find the difference in the register values for
761 * the original and derivative runs.
762 */
Brian33eac562007-02-25 18:52:41 -0700763 fetch_vector4(&inst->SrcReg[0], machine, program, a);
Briane80d9012007-02-23 16:53:24 -0700764 init_machine_deriv(ctx, machine, program, span,
765 'X', &dMachine);
766 execute_program(ctx, program, pc, &dMachine, span, column);
Brian33eac562007-02-25 18:52:41 -0700767 fetch_vector4(&inst->SrcReg[0], &dMachine, program,
Briane80d9012007-02-23 16:53:24 -0700768 aNext);
769 result[0] = aNext[0] - a[0];
770 result[1] = aNext[1] - a[1];
771 result[2] = aNext[2] - a[2];
772 result[3] = aNext[3] - a[3];
Brian13e3b212007-02-22 16:09:40 -0700773 }
Briane80d9012007-02-23 16:53:24 -0700774 store_vector4(inst, machine, result);
775#else
Brianf183a2d2007-02-23 17:14:30 -0700776 store_vector4(inst, machine, ZeroVec);
Briane80d9012007-02-23 16:53:24 -0700777#endif
778 }
779 break;
780 case OPCODE_DDY: /* Partial derivative with respect to Y */
781 {
Brian13e3b212007-02-22 16:09:40 -0700782#if 0
Briane80d9012007-02-23 16:53:24 -0700783 GLfloat a[4], aNext[4], result[4];
784 struct gl_program_machine dMachine;
785 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
786 column, result)) {
787 init_machine_deriv(ctx, machine, program, span,
788 'Y', &dMachine);
Brian33eac562007-02-25 18:52:41 -0700789 fetch_vector4(&inst->SrcReg[0], machine, program, a);
Briane80d9012007-02-23 16:53:24 -0700790 execute_program(ctx, program, pc, &dMachine, span, column);
Brian33eac562007-02-25 18:52:41 -0700791 fetch_vector4(&inst->SrcReg[0], &dMachine, program,
Briane80d9012007-02-23 16:53:24 -0700792 aNext);
793 result[0] = aNext[0] - a[0];
794 result[1] = aNext[1] - a[1];
795 result[2] = aNext[2] - a[2];
796 result[3] = aNext[3] - a[3];
797 }
798 store_vector4(inst, machine, result);
Brian13e3b212007-02-22 16:09:40 -0700799#else
Brianf183a2d2007-02-23 17:14:30 -0700800 store_vector4(inst, machine, ZeroVec);
Brian13e3b212007-02-22 16:09:40 -0700801#endif
Briane80d9012007-02-23 16:53:24 -0700802 }
803 break;
804 case OPCODE_DP3:
805 {
806 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700807 fetch_vector4(&inst->SrcReg[0], machine, a);
808 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700809 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
810 store_vector4(inst, machine, result);
811 if (DEBUG_PROG) {
812 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
813 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
Brian13e3b212007-02-22 16:09:40 -0700814 }
Briane80d9012007-02-23 16:53:24 -0700815 }
816 break;
817 case OPCODE_DP4:
818 {
819 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700820 fetch_vector4(&inst->SrcReg[0], machine, a);
821 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700822 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
823 store_vector4(inst, machine, result);
824 if (DEBUG_PROG) {
825 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
826 result[0], a[0], a[1], a[2], a[3],
827 b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700828 }
Briane80d9012007-02-23 16:53:24 -0700829 }
830 break;
831 case OPCODE_DPH:
832 {
833 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700834 fetch_vector4(&inst->SrcReg[0], machine, a);
835 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700836 result[0] = result[1] = result[2] = result[3] =
837 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
838 store_vector4(inst, machine, result);
839 }
840 break;
841 case OPCODE_DST: /* Distance vector */
842 {
843 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700844 fetch_vector4(&inst->SrcReg[0], machine, a);
845 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700846 result[0] = 1.0F;
847 result[1] = a[1] * b[1];
848 result[2] = a[2];
849 result[3] = b[3];
850 store_vector4(inst, machine, result);
851 }
852 break;
Brianf183a2d2007-02-23 17:14:30 -0700853 case OPCODE_EXP:
Brianf183a2d2007-02-23 17:14:30 -0700854 {
855 GLfloat t[4], q[4], floor_t0;
Brian33eac562007-02-25 18:52:41 -0700856 fetch_vector1(&inst->SrcReg[0], machine, t);
Brianf183a2d2007-02-23 17:14:30 -0700857 floor_t0 = FLOORF(t[0]);
858 if (floor_t0 > FLT_MAX_EXP) {
859 SET_POS_INFINITY(q[0]);
860 SET_POS_INFINITY(q[2]);
861 }
862 else if (floor_t0 < FLT_MIN_EXP) {
863 q[0] = 0.0F;
864 q[2] = 0.0F;
865 }
866 else {
Brian761728a2007-02-24 11:14:57 -0700867 q[0] = LDEXPF(1.0, (int) floor_t0);
868 /* Note: GL_NV_vertex_program expects
869 * result.z = result.x * APPX(result.y)
870 * We do what the ARB extension says.
871 */
872 q[2] = pow(2.0, t[0]);
Brianf183a2d2007-02-23 17:14:30 -0700873 }
874 q[1] = t[0] - floor_t0;
875 q[3] = 1.0F;
876 store_vector4( inst, machine, q );
877 }
878 break;
Briane80d9012007-02-23 16:53:24 -0700879 case OPCODE_EX2: /* Exponential base 2 */
880 {
881 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700882 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700883 result[0] = result[1] = result[2] = result[3] =
884 (GLfloat) _mesa_pow(2.0, a[0]);
885 store_vector4(inst, machine, result);
886 }
887 break;
888 case OPCODE_FLR:
889 {
890 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700891 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700892 result[0] = FLOORF(a[0]);
893 result[1] = FLOORF(a[1]);
894 result[2] = FLOORF(a[2]);
895 result[3] = FLOORF(a[3]);
896 store_vector4(inst, machine, result);
897 }
898 break;
899 case OPCODE_FRC:
900 {
901 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700902 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700903 result[0] = a[0] - FLOORF(a[0]);
904 result[1] = a[1] - FLOORF(a[1]);
905 result[2] = a[2] - FLOORF(a[2]);
906 result[3] = a[3] - FLOORF(a[3]);
907 store_vector4(inst, machine, result);
908 }
909 break;
910 case OPCODE_IF:
911 if (eval_condition(machine, inst)) {
912 /* do if-clause (just continue execution) */
913 }
914 else {
915 /* go to the instruction after ELSE or ENDIF */
Brian13e3b212007-02-22 16:09:40 -0700916 assert(inst->BranchTarget >= 0);
917 pc = inst->BranchTarget - 1;
Briane80d9012007-02-23 16:53:24 -0700918 }
919 break;
920 case OPCODE_ELSE:
921 /* goto ENDIF */
922 assert(inst->BranchTarget >= 0);
923 pc = inst->BranchTarget - 1;
924 break;
925 case OPCODE_ENDIF:
926 /* nothing */
927 break;
928 case OPCODE_INT: /* float to int */
929 {
930 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700931 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700932 result[0] = (GLfloat) (GLint) a[0];
933 result[1] = (GLfloat) (GLint) a[1];
934 result[2] = (GLfloat) (GLint) a[2];
935 result[3] = (GLfloat) (GLint) a[3];
936 store_vector4(inst, machine, result);
937 }
938 break;
939 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
940 if (eval_condition(machine, inst)) {
941 return GL_FALSE;
942 }
943 break;
944 case OPCODE_KIL: /* ARB_f_p only */
945 {
946 GLfloat a[4];
Brian33eac562007-02-25 18:52:41 -0700947 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700948 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
Brian13e3b212007-02-22 16:09:40 -0700949 return GL_FALSE;
950 }
Briane80d9012007-02-23 16:53:24 -0700951 }
952 break;
953 case OPCODE_LG2: /* log base 2 */
954 {
955 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700956 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700957 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
958 store_vector4(inst, machine, result);
959 }
960 break;
961 case OPCODE_LIT:
962 {
963 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
964 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700965 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700966 a[0] = MAX2(a[0], 0.0F);
967 a[1] = MAX2(a[1], 0.0F);
968 /* XXX ARB version clamps a[3], NV version doesn't */
969 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
970 result[0] = 1.0F;
971 result[1] = a[0];
972 /* XXX we could probably just use pow() here */
973 if (a[0] > 0.0F) {
974 if (a[1] == 0.0 && a[3] == 0.0)
975 result[2] = 1.0;
976 else
977 result[2] = EXPF(a[3] * LOGF(a[1]));
Brian13e3b212007-02-22 16:09:40 -0700978 }
Briane80d9012007-02-23 16:53:24 -0700979 else {
980 result[2] = 0.0;
Brian13e3b212007-02-22 16:09:40 -0700981 }
Briane80d9012007-02-23 16:53:24 -0700982 result[3] = 1.0F;
983 store_vector4(inst, machine, result);
984 if (DEBUG_PROG) {
985 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
986 result[0], result[1], result[2], result[3],
987 a[0], a[1], a[2], a[3]);
Brian13e3b212007-02-22 16:09:40 -0700988 }
Briane80d9012007-02-23 16:53:24 -0700989 }
990 break;
Brianf183a2d2007-02-23 17:14:30 -0700991 case OPCODE_LOG:
992 {
993 GLfloat t[4], q[4], abs_t0;
Brian33eac562007-02-25 18:52:41 -0700994 fetch_vector1(&inst->SrcReg[0], machine, t);
Brianf183a2d2007-02-23 17:14:30 -0700995 abs_t0 = FABSF(t[0]);
996 if (abs_t0 != 0.0F) {
997 /* Since we really can't handle infinite values on VMS
998 * like other OSes we'll use __MAXFLOAT to represent
999 * infinity. This may need some tweaking.
1000 */
1001#ifdef VMS
1002 if (abs_t0 == __MAXFLOAT)
1003#else
1004 if (IS_INF_OR_NAN(abs_t0))
1005#endif
1006 {
1007 SET_POS_INFINITY(q[0]);
1008 q[1] = 1.0F;
1009 SET_POS_INFINITY(q[2]);
1010 }
1011 else {
1012 int exponent;
1013 GLfloat mantissa = FREXPF(t[0], &exponent);
1014 q[0] = (GLfloat) (exponent - 1);
1015 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1016 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
1017 }
1018 }
1019 else {
1020 SET_NEG_INFINITY(q[0]);
1021 q[1] = 1.0F;
1022 SET_NEG_INFINITY(q[2]);
1023 }
1024 q[3] = 1.0;
1025 store_vector4(inst, machine, q);
1026 }
1027 break;
Briane80d9012007-02-23 16:53:24 -07001028 case OPCODE_LRP:
1029 {
1030 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001031 fetch_vector4(&inst->SrcReg[0], machine, a);
1032 fetch_vector4(&inst->SrcReg[1], machine, b);
1033 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -07001034 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1035 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1036 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1037 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1038 store_vector4(inst, machine, result);
1039 if (DEBUG_PROG) {
1040 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1041 "(%g %g %g %g), (%g %g %g %g)\n",
1042 result[0], result[1], result[2], result[3],
1043 a[0], a[1], a[2], a[3],
1044 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001045 }
Briane80d9012007-02-23 16:53:24 -07001046 }
1047 break;
1048 case OPCODE_MAD:
1049 {
1050 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001051 fetch_vector4(&inst->SrcReg[0], machine, a);
1052 fetch_vector4(&inst->SrcReg[1], machine, b);
1053 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -07001054 result[0] = a[0] * b[0] + c[0];
1055 result[1] = a[1] * b[1] + c[1];
1056 result[2] = a[2] * b[2] + c[2];
1057 result[3] = a[3] * b[3] + c[3];
1058 store_vector4(inst, machine, result);
1059 if (DEBUG_PROG) {
1060 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1061 "(%g %g %g %g) + (%g %g %g %g)\n",
1062 result[0], result[1], result[2], result[3],
1063 a[0], a[1], a[2], a[3],
1064 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001065 }
Briane80d9012007-02-23 16:53:24 -07001066 }
1067 break;
1068 case OPCODE_MAX:
1069 {
1070 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001071 fetch_vector4(&inst->SrcReg[0], machine, a);
1072 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001073 result[0] = MAX2(a[0], b[0]);
1074 result[1] = MAX2(a[1], b[1]);
1075 result[2] = MAX2(a[2], b[2]);
1076 result[3] = MAX2(a[3], b[3]);
1077 store_vector4(inst, machine, result);
1078 if (DEBUG_PROG) {
1079 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1080 result[0], result[1], result[2], result[3],
1081 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001082 }
Briane80d9012007-02-23 16:53:24 -07001083 }
1084 break;
1085 case OPCODE_MIN:
1086 {
1087 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001088 fetch_vector4(&inst->SrcReg[0], machine, a);
1089 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001090 result[0] = MIN2(a[0], b[0]);
1091 result[1] = MIN2(a[1], b[1]);
1092 result[2] = MIN2(a[2], b[2]);
1093 result[3] = MIN2(a[3], b[3]);
1094 store_vector4(inst, machine, result);
1095 }
1096 break;
1097 case OPCODE_MOV:
1098 {
1099 GLfloat result[4];
Brian33eac562007-02-25 18:52:41 -07001100 fetch_vector4(&inst->SrcReg[0], machine, result);
Briane80d9012007-02-23 16:53:24 -07001101 store_vector4(inst, machine, result);
1102 if (DEBUG_PROG) {
1103 printf("MOV (%g %g %g %g)\n",
1104 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001105 }
Briane80d9012007-02-23 16:53:24 -07001106 }
1107 break;
1108 case OPCODE_MUL:
1109 {
1110 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001111 fetch_vector4(&inst->SrcReg[0], machine, a);
1112 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001113 result[0] = a[0] * b[0];
1114 result[1] = a[1] * b[1];
1115 result[2] = a[2] * b[2];
1116 result[3] = a[3] * b[3];
1117 store_vector4(inst, machine, result);
1118 if (DEBUG_PROG) {
1119 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1120 result[0], result[1], result[2], result[3],
1121 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001122 }
Briane80d9012007-02-23 16:53:24 -07001123 }
1124 break;
1125 case OPCODE_NOISE1:
1126 {
1127 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001128 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001129 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001130 result[1] =
Briane80d9012007-02-23 16:53:24 -07001131 result[2] = result[3] = _slang_library_noise1(a[0]);
1132 store_vector4(inst, machine, result);
1133 }
1134 break;
1135 case OPCODE_NOISE2:
1136 {
1137 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001138 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001139 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001140 result[1] =
Briane80d9012007-02-23 16:53:24 -07001141 result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1142 store_vector4(inst, machine, result);
1143 }
1144 break;
1145 case OPCODE_NOISE3:
1146 {
1147 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001148 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001149 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001150 result[1] =
1151 result[2] =
1152 result[3] = _slang_library_noise3(a[0], a[1], a[2]);
Briane80d9012007-02-23 16:53:24 -07001153 store_vector4(inst, machine, result);
1154 }
1155 break;
1156 case OPCODE_NOISE4:
1157 {
1158 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001159 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001160 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001161 result[1] =
1162 result[2] =
1163 result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
Briane80d9012007-02-23 16:53:24 -07001164 store_vector4(inst, machine, result);
1165 }
1166 break;
1167 case OPCODE_NOP:
1168 break;
1169 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1170 {
1171 GLfloat a[4], result[4];
1172 GLhalfNV hx, hy;
1173 GLuint *rawResult = (GLuint *) result;
1174 GLuint twoHalves;
Brian33eac562007-02-25 18:52:41 -07001175 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001176 hx = _mesa_float_to_half(a[0]);
1177 hy = _mesa_float_to_half(a[1]);
1178 twoHalves = hx | (hy << 16);
1179 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1180 = twoHalves;
1181 store_vector4(inst, machine, result);
1182 }
1183 break;
1184 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1185 {
1186 GLfloat a[4], result[4];
1187 GLuint usx, usy, *rawResult = (GLuint *) result;
Brian33eac562007-02-25 18:52:41 -07001188 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001189 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1190 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1191 usx = IROUND(a[0] * 65535.0F);
1192 usy = IROUND(a[1] * 65535.0F);
1193 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1194 = usx | (usy << 16);
1195 store_vector4(inst, machine, result);
1196 }
1197 break;
1198 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1199 {
1200 GLfloat a[4], result[4];
1201 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
Brian33eac562007-02-25 18:52:41 -07001202 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001203 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1204 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1205 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1206 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1207 ubx = IROUND(127.0F * a[0] + 128.0F);
1208 uby = IROUND(127.0F * a[1] + 128.0F);
1209 ubz = IROUND(127.0F * a[2] + 128.0F);
1210 ubw = IROUND(127.0F * a[3] + 128.0F);
1211 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1212 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1213 store_vector4(inst, machine, result);
1214 }
1215 break;
1216 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1217 {
1218 GLfloat a[4], result[4];
1219 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
Brian33eac562007-02-25 18:52:41 -07001220 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001221 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1222 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1223 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1224 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1225 ubx = IROUND(255.0F * a[0]);
1226 uby = IROUND(255.0F * a[1]);
1227 ubz = IROUND(255.0F * a[2]);
1228 ubw = IROUND(255.0F * a[3]);
1229 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1230 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1231 store_vector4(inst, machine, result);
1232 }
1233 break;
1234 case OPCODE_POW:
1235 {
1236 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001237 fetch_vector1(&inst->SrcReg[0], machine, a);
1238 fetch_vector1(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001239 result[0] = result[1] = result[2] = result[3]
1240 = (GLfloat) _mesa_pow(a[0], b[0]);
1241 store_vector4(inst, machine, result);
1242 }
1243 break;
1244 case OPCODE_RCP:
1245 {
1246 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001247 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001248 if (DEBUG_PROG) {
1249 if (a[0] == 0)
1250 printf("RCP(0)\n");
1251 else if (IS_INF_OR_NAN(a[0]))
1252 printf("RCP(inf)\n");
Brian13e3b212007-02-22 16:09:40 -07001253 }
Briane80d9012007-02-23 16:53:24 -07001254 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1255 store_vector4(inst, machine, result);
1256 }
1257 break;
1258 case OPCODE_RET: /* return from subroutine (conditional) */
1259 if (eval_condition(machine, inst)) {
1260 if (machine->StackDepth == 0) {
1261 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -07001262 }
Briane80d9012007-02-23 16:53:24 -07001263 pc = machine->CallStack[--machine->StackDepth];
1264 }
1265 break;
1266 case OPCODE_RFL: /* reflection vector */
1267 {
1268 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
Brian33eac562007-02-25 18:52:41 -07001269 fetch_vector4(&inst->SrcReg[0], machine, axis);
1270 fetch_vector4(&inst->SrcReg[1], machine, dir);
Briane80d9012007-02-23 16:53:24 -07001271 tmpW = DOT3(axis, axis);
1272 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1273 result[0] = tmpX * axis[0] - dir[0];
1274 result[1] = tmpX * axis[1] - dir[1];
1275 result[2] = tmpX * axis[2] - dir[2];
1276 /* result[3] is never written! XXX enforce in parser! */
1277 store_vector4(inst, machine, result);
1278 }
1279 break;
1280 case OPCODE_RSQ: /* 1 / sqrt() */
1281 {
1282 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001283 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001284 a[0] = FABSF(a[0]);
1285 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1286 store_vector4(inst, machine, result);
1287 if (DEBUG_PROG) {
1288 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
Brian13e3b212007-02-22 16:09:40 -07001289 }
Briane80d9012007-02-23 16:53:24 -07001290 }
1291 break;
1292 case OPCODE_SCS: /* sine and cos */
1293 {
1294 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001295 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001296 result[0] = (GLfloat) _mesa_cos(a[0]);
1297 result[1] = (GLfloat) _mesa_sin(a[0]);
1298 result[2] = 0.0; /* undefined! */
1299 result[3] = 0.0; /* undefined! */
1300 store_vector4(inst, machine, result);
1301 }
1302 break;
1303 case OPCODE_SEQ: /* set on equal */
1304 {
1305 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001306 fetch_vector4(&inst->SrcReg[0], machine, a);
1307 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001308 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1309 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1310 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1311 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1312 store_vector4(inst, machine, result);
1313 }
1314 break;
1315 case OPCODE_SFL: /* set false, operands ignored */
1316 {
1317 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1318 store_vector4(inst, machine, result);
1319 }
1320 break;
1321 case OPCODE_SGE: /* set on greater or equal */
1322 {
1323 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001324 fetch_vector4(&inst->SrcReg[0], machine, a);
1325 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001326 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1327 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1328 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1329 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1330 store_vector4(inst, machine, result);
1331 }
1332 break;
1333 case OPCODE_SGT: /* set on greater */
1334 {
1335 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001336 fetch_vector4(&inst->SrcReg[0], machine, a);
1337 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001338 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1339 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1340 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1341 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1342 store_vector4(inst, machine, result);
1343 if (DEBUG_PROG) {
1344 printf("SGT %g %g %g %g\n",
1345 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001346 }
Briane80d9012007-02-23 16:53:24 -07001347 }
1348 break;
1349 case OPCODE_SIN:
1350 {
1351 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001352 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001353 result[0] = result[1] = result[2] = result[3]
1354 = (GLfloat) _mesa_sin(a[0]);
1355 store_vector4(inst, machine, result);
1356 }
1357 break;
1358 case OPCODE_SLE: /* set on less or equal */
1359 {
1360 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001361 fetch_vector4(&inst->SrcReg[0], machine, a);
1362 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001363 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1364 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1365 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1366 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1367 store_vector4(inst, machine, result);
1368 }
1369 break;
1370 case OPCODE_SLT: /* set on less */
1371 {
1372 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001373 fetch_vector4(&inst->SrcReg[0], machine, a);
1374 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001375 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1376 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1377 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1378 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1379 store_vector4(inst, machine, result);
1380 }
1381 break;
1382 case OPCODE_SNE: /* set on not equal */
1383 {
1384 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001385 fetch_vector4(&inst->SrcReg[0], machine, a);
1386 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001387 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1388 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1389 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1390 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1391 store_vector4(inst, machine, result);
1392 }
1393 break;
1394 case OPCODE_STR: /* set true, operands ignored */
1395 {
1396 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1397 store_vector4(inst, machine, result);
1398 }
1399 break;
1400 case OPCODE_SUB:
1401 {
1402 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001403 fetch_vector4(&inst->SrcReg[0], machine, a);
1404 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001405 result[0] = a[0] - b[0];
1406 result[1] = a[1] - b[1];
1407 result[2] = a[2] - b[2];
1408 result[3] = a[3] - b[3];
1409 store_vector4(inst, machine, result);
1410 if (DEBUG_PROG) {
1411 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1412 result[0], result[1], result[2], result[3],
1413 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001414 }
Briane80d9012007-02-23 16:53:24 -07001415 }
1416 break;
1417 case OPCODE_SWZ: /* extended swizzle */
1418 {
1419 const struct prog_src_register *source = &inst->SrcReg[0];
Brian33eac562007-02-25 18:52:41 -07001420 const GLfloat *src = get_register_pointer(source, machine);
Briane80d9012007-02-23 16:53:24 -07001421 GLfloat result[4];
1422 GLuint i;
1423 for (i = 0; i < 4; i++) {
1424 const GLuint swz = GET_SWZ(source->Swizzle, i);
1425 if (swz == SWIZZLE_ZERO)
1426 result[i] = 0.0;
1427 else if (swz == SWIZZLE_ONE)
1428 result[i] = 1.0;
Brian13e3b212007-02-22 16:09:40 -07001429 else {
Briane80d9012007-02-23 16:53:24 -07001430 ASSERT(swz >= 0);
1431 ASSERT(swz <= 3);
1432 result[i] = src[swz];
Brian13e3b212007-02-22 16:09:40 -07001433 }
Briane80d9012007-02-23 16:53:24 -07001434 if (source->NegateBase & (1 << i))
1435 result[i] = -result[i];
Brian13e3b212007-02-22 16:09:40 -07001436 }
Briane80d9012007-02-23 16:53:24 -07001437 store_vector4(inst, machine, result);
1438 }
1439 break;
1440 case OPCODE_TEX: /* Both ARB and NV frag prog */
1441 /* Texel lookup */
1442 {
1443 /* Note: only use the precomputed lambda value when we're
1444 * sampling texture unit [K] with texcoord[K].
1445 * Otherwise, the lambda value may have no relation to the
1446 * instruction's texcoord or texture image. Using the wrong
1447 * lambda is usually bad news.
1448 * The rest of the time, just use zero (until we get a more
1449 * sophisticated way of computing lambda).
1450 */
1451 GLfloat coord[4], color[4], lambda;
1452#if 0
1453 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1454 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1455 lambda = span->array->lambda[inst->TexSrcUnit][column];
1456 else
1457#endif
1458 lambda = 0.0;
Brian33eac562007-02-25 18:52:41 -07001459 fetch_vector4(&inst->SrcReg[0], machine, coord);
Briane80d9012007-02-23 16:53:24 -07001460 machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit,
1461 color);
1462 if (DEBUG_PROG) {
1463 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1464 "lod %f\n",
1465 color[0], color[1], color[2], color[3],
1466 inst->TexSrcUnit,
1467 coord[0], coord[1], coord[2], coord[3], lambda);
1468 }
1469 store_vector4(inst, machine, color);
1470 }
1471 break;
1472 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1473 /* Texel lookup with LOD bias */
1474 {
1475 const struct gl_texture_unit *texUnit
1476 = &ctx->Texture.Unit[inst->TexSrcUnit];
1477 GLfloat coord[4], color[4], lambda, bias;
1478#if 0
1479 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1480 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1481 lambda = span->array->lambda[inst->TexSrcUnit][column];
1482 else
1483#endif
1484 lambda = 0.0;
Brian33eac562007-02-25 18:52:41 -07001485 fetch_vector4(&inst->SrcReg[0], machine, coord);
Briane80d9012007-02-23 16:53:24 -07001486 /* coord[3] is the bias to add to lambda */
1487 bias = texUnit->LodBias + coord[3];
1488 if (texUnit->_Current)
1489 bias += texUnit->_Current->LodBias;
1490 machine->FetchTexelLod(ctx, coord, lambda + bias,
1491 inst->TexSrcUnit, color);
1492 store_vector4(inst, machine, color);
1493 }
1494 break;
1495 case OPCODE_TXD: /* GL_NV_fragment_program only */
1496 /* Texture lookup w/ partial derivatives for LOD */
1497 {
1498 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
Brian33eac562007-02-25 18:52:41 -07001499 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1500 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1501 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
Briane80d9012007-02-23 16:53:24 -07001502 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1503 inst->TexSrcUnit, color);
1504 store_vector4(inst, machine, color);
1505 }
1506 break;
1507 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1508 /* Texture lookup w/ projective divide */
1509 {
1510 GLfloat texcoord[4], color[4], lambda;
1511#if 0
1512 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1513 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1514 lambda = span->array->lambda[inst->TexSrcUnit][column];
1515 else
1516#endif
1517 lambda = 0.0;
Brian33eac562007-02-25 18:52:41 -07001518 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
Briane80d9012007-02-23 16:53:24 -07001519 /* Not so sure about this test - if texcoord[3] is
1520 * zero, we'd probably be fine except for an ASSERT in
1521 * IROUND_POS() which gets triggered by the inf values created.
1522 */
1523 if (texcoord[3] != 0.0) {
1524 texcoord[0] /= texcoord[3];
1525 texcoord[1] /= texcoord[3];
1526 texcoord[2] /= texcoord[3];
1527 }
1528 machine->FetchTexelLod(ctx, texcoord, lambda,
1529 inst->TexSrcUnit, color);
1530 store_vector4(inst, machine, color);
1531 }
1532 break;
1533 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1534 /* Texture lookup w/ projective divide */
1535 {
1536 GLfloat texcoord[4], color[4], lambda;
1537#if 0
1538 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1539 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1540 lambda = span->array->lambda[inst->TexSrcUnit][column];
1541 else
1542#endif
1543 lambda = 0.0;
Brian33eac562007-02-25 18:52:41 -07001544 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
Briane80d9012007-02-23 16:53:24 -07001545 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1546 texcoord[3] != 0.0) {
1547 texcoord[0] /= texcoord[3];
1548 texcoord[1] /= texcoord[3];
1549 texcoord[2] /= texcoord[3];
1550 }
1551 machine->FetchTexelLod(ctx, texcoord, lambda,
1552 inst->TexSrcUnit, color);
1553 store_vector4(inst, machine, color);
1554 }
1555 break;
1556 case OPCODE_UP2H: /* unpack two 16-bit floats */
1557 {
1558 GLfloat a[4], result[4];
1559 const GLuint *rawBits = (const GLuint *) a;
1560 GLhalfNV hx, hy;
Brian33eac562007-02-25 18:52:41 -07001561 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001562 hx = rawBits[0] & 0xffff;
1563 hy = rawBits[0] >> 16;
1564 result[0] = result[2] = _mesa_half_to_float(hx);
1565 result[1] = result[3] = _mesa_half_to_float(hy);
1566 store_vector4(inst, machine, result);
1567 }
1568 break;
1569 case OPCODE_UP2US: /* unpack two GLushorts */
1570 {
1571 GLfloat a[4], result[4];
1572 const GLuint *rawBits = (const GLuint *) a;
1573 GLushort usx, usy;
Brian33eac562007-02-25 18:52:41 -07001574 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001575 usx = rawBits[0] & 0xffff;
1576 usy = rawBits[0] >> 16;
1577 result[0] = result[2] = usx * (1.0f / 65535.0f);
1578 result[1] = result[3] = usy * (1.0f / 65535.0f);
1579 store_vector4(inst, machine, result);
1580 }
1581 break;
1582 case OPCODE_UP4B: /* unpack four GLbytes */
1583 {
1584 GLfloat a[4], result[4];
1585 const GLuint *rawBits = (const GLuint *) a;
Brian33eac562007-02-25 18:52:41 -07001586 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001587 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1588 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1589 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1590 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1591 store_vector4(inst, machine, result);
1592 }
1593 break;
1594 case OPCODE_UP4UB: /* unpack four GLubytes */
1595 {
1596 GLfloat a[4], result[4];
1597 const GLuint *rawBits = (const GLuint *) a;
Brian33eac562007-02-25 18:52:41 -07001598 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001599 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1600 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1601 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1602 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1603 store_vector4(inst, machine, result);
1604 }
1605 break;
1606 case OPCODE_XPD: /* cross product */
1607 {
1608 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001609 fetch_vector4(&inst->SrcReg[0], machine, a);
1610 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001611 result[0] = a[1] * b[2] - a[2] * b[1];
1612 result[1] = a[2] * b[0] - a[0] * b[2];
1613 result[2] = a[0] * b[1] - a[1] * b[0];
1614 result[3] = 1.0;
1615 store_vector4(inst, machine, result);
1616 }
1617 break;
1618 case OPCODE_X2D: /* 2-D matrix transform */
1619 {
1620 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001621 fetch_vector4(&inst->SrcReg[0], machine, a);
1622 fetch_vector4(&inst->SrcReg[1], machine, b);
1623 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -07001624 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1625 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1626 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1627 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1628 store_vector4(inst, machine, result);
1629 }
1630 break;
1631 case OPCODE_PRINT:
1632 {
1633 if (inst->SrcReg[0].File != -1) {
1634 GLfloat a[4];
Brian33eac562007-02-25 18:52:41 -07001635 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001636 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1637 a[0], a[1], a[2], a[3]);
1638 }
1639 else {
1640 _mesa_printf("%s\n", (const char *) inst->Data);
1641 }
1642 }
1643 break;
1644 case OPCODE_END:
1645 return GL_TRUE;
1646 default:
1647 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1648 inst->Opcode);
1649 return GL_TRUE; /* return value doesn't matter */
Brian13e3b212007-02-22 16:09:40 -07001650
1651 }
Briane80d9012007-02-23 16:53:24 -07001652
Briancfd00112007-02-25 18:30:45 -07001653 numExec++;
1654 if (numExec > maxExec) {
Brian13e3b212007-02-22 16:09:40 -07001655 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1656 return GL_TRUE;
Brian13e3b212007-02-22 16:09:40 -07001657 }
Briane80d9012007-02-23 16:53:24 -07001658
1659 } /* for pc */
Brian13e3b212007-02-22 16:09:40 -07001660
1661#if FEATURE_MESA_program_debug
1662 CurrentMachine = NULL;
1663#endif
1664
1665 return GL_TRUE;
1666}