blob: 91ea52070e3ef5e6f1b6aac33ca4ac51c0134969 [file] [log] [blame]
Brian13e3b212007-02-22 16:09:40 -07001/*
2 * Mesa 3-D graphics library
3 * Version: 6.5.3
4 *
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31/*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38#include "glheader.h"
39#include "colormac.h"
40#include "context.h"
41#include "program.h"
42#include "prog_execute.h"
43#include "prog_instruction.h"
44#include "prog_parameter.h"
45#include "prog_print.h"
46#include "slang_library_noise.h"
47
48
49/* See comments below for info about this */
50#define LAMBDA_ZERO 1
51
52/* debug predicate */
53#define DEBUG_PROG 0
54
55
Brianf183a2d2007-02-23 17:14:30 -070056/**
57 * Set x to positive or negative infinity.
58 */
59#if defined(USE_IEEE) || defined(_WIN32)
60#define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
61#define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
62#elif defined(VMS)
63#define SET_POS_INFINITY(x) x = __MAXFLOAT
64#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
65#else
66#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
67#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
68#endif
69
70#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
71
72
73static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
74
75
76
Brian13e3b212007-02-22 16:09:40 -070077#if FEATURE_MESA_program_debug
78static struct gl_program_machine *CurrentMachine = NULL;
79
80/**
81 * For GL_MESA_program_debug.
82 * Return current value (4*GLfloat) of a program register.
83 * Called via ctx->Driver.GetFragmentProgramRegister().
84 */
85void
86_mesa_get_program_register(GLcontext *ctx, enum register_file file,
87 GLuint index, GLfloat val[4])
88{
89 if (CurrentMachine) {
90 switch (file) {
91 case PROGRAM_INPUT:
92 if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
93 COPY_4V(val, CurrentMachine->VertAttribs[index]);
94 }
95 else {
96 COPY_4V(val,
Briane80d9012007-02-23 16:53:24 -070097 CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
Brian13e3b212007-02-22 16:09:40 -070098 }
99 break;
100 case PROGRAM_OUTPUT:
101 COPY_4V(val, CurrentMachine->Outputs[index]);
102 break;
103 case PROGRAM_TEMPORARY:
104 COPY_4V(val, CurrentMachine->Temporaries[index]);
105 break;
106 default:
107 _mesa_problem(NULL,
108 "bad register file in _swrast_get_program_register");
109 }
110 }
111}
112#endif /* FEATURE_MESA_program_debug */
113
114
115
116/**
117 * Return a pointer to the 4-element float vector specified by the given
118 * source register.
119 */
120static INLINE const GLfloat *
Briane80d9012007-02-23 16:53:24 -0700121get_register_pointer(GLcontext * ctx,
122 const struct prog_src_register *source,
123 const struct gl_program_machine *machine)
Brian13e3b212007-02-22 16:09:40 -0700124{
125 /* XXX relative addressing... */
Brianf183a2d2007-02-23 17:14:30 -0700126
127 if (source->RelAddr) {
128 const GLint reg = source->Index + machine->AddressReg[0][0];
129 ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
130 (source->File == PROGRAM_STATE_VAR) );
131 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
132 return ZeroVec;
133 else if (source->File == PROGRAM_ENV_PARAM)
134 return ctx->VertexProgram.Parameters[reg];
135 else {
136 /*
137 ASSERT(source->File == PROGRAM_LOCAL_PARAM);
138 */
139 return machine->CurProgram->Parameters->ParameterValues[reg];
140 }
141 }
142
143
Brian13e3b212007-02-22 16:09:40 -0700144 switch (source->File) {
145 case PROGRAM_TEMPORARY:
146 ASSERT(source->Index < MAX_PROGRAM_TEMPS);
147 return machine->Temporaries[source->Index];
148
149 case PROGRAM_INPUT:
150 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
151 ASSERT(source->Index < VERT_ATTRIB_MAX);
152 return machine->VertAttribs[source->Index];
153 }
154 else {
155 ASSERT(source->Index < FRAG_ATTRIB_MAX);
156 return machine->Attribs[source->Index][machine->CurElement];
157 }
158
159 case PROGRAM_OUTPUT:
160 /* This is only for PRINT */
161 ASSERT(source->Index < FRAG_RESULT_MAX);
162 return machine->Outputs[source->Index];
163
164 case PROGRAM_LOCAL_PARAM:
165 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
166 return machine->CurProgram->LocalParams[source->Index];
167
168 case PROGRAM_ENV_PARAM:
169 ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
170 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
171 return ctx->VertexProgram.Parameters[source->Index];
172 else
173 return ctx->FragmentProgram.Parameters[source->Index];
174
175 case PROGRAM_STATE_VAR:
176 /* Fallthrough */
177 case PROGRAM_CONSTANT:
178 /* Fallthrough */
179 case PROGRAM_UNIFORM:
180 /* Fallthrough */
181 case PROGRAM_NAMED_PARAM:
182 ASSERT(source->Index <
183 (GLint) machine->CurProgram->Parameters->NumParameters);
184 return machine->CurProgram->Parameters->ParameterValues[source->Index];
185
186 default:
187 _mesa_problem(ctx,
188 "Invalid input register file %d in get_register_pointer()",
189 source->File);
190 return NULL;
191 }
192}
193
194
195/**
196 * Fetch a 4-element float vector from the given source register.
197 * Apply swizzling and negating as needed.
198 */
199static void
Briane80d9012007-02-23 16:53:24 -0700200fetch_vector4(GLcontext * ctx,
201 const struct prog_src_register *source,
202 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700203{
204 const GLfloat *src = get_register_pointer(ctx, source, machine);
205 ASSERT(src);
206
207 if (source->Swizzle == SWIZZLE_NOOP) {
208 /* no swizzling */
209 COPY_4V(result, src);
210 }
211 else {
212 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
213 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
214 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
215 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
216 result[0] = src[GET_SWZ(source->Swizzle, 0)];
217 result[1] = src[GET_SWZ(source->Swizzle, 1)];
218 result[2] = src[GET_SWZ(source->Swizzle, 2)];
219 result[3] = src[GET_SWZ(source->Swizzle, 3)];
220 }
221
222 if (source->NegateBase) {
223 result[0] = -result[0];
224 result[1] = -result[1];
225 result[2] = -result[2];
226 result[3] = -result[3];
227 }
228 if (source->Abs) {
229 result[0] = FABSF(result[0]);
230 result[1] = FABSF(result[1]);
231 result[2] = FABSF(result[2]);
232 result[3] = FABSF(result[3]);
233 }
234 if (source->NegateAbs) {
235 result[0] = -result[0];
236 result[1] = -result[1];
237 result[2] = -result[2];
238 result[3] = -result[3];
239 }
240}
241
242#if 0
243/**
244 * Fetch the derivative with respect to X for the given register.
245 * \return GL_TRUE if it was easily computed or GL_FALSE if we
246 * need to execute another instance of the program (ugh)!
247 */
248static GLboolean
Briane80d9012007-02-23 16:53:24 -0700249fetch_vector4_deriv(GLcontext * ctx,
250 const struct prog_src_register *source,
251 const SWspan * span,
252 char xOrY, GLint column, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700253{
254 GLfloat src[4];
255
256 ASSERT(xOrY == 'X' || xOrY == 'Y');
257
258 switch (source->Index) {
259 case FRAG_ATTRIB_WPOS:
260 if (xOrY == 'X') {
261 src[0] = 1.0;
262 src[1] = 0.0;
263 src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700264 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700265 src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
266 }
267 else {
268 src[0] = 0.0;
269 src[1] = 1.0;
270 src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700271 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700272 src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
273 }
274 break;
275 case FRAG_ATTRIB_COL0:
276 case FRAG_ATTRIB_COL1:
277 if (xOrY == 'X') {
278 src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
279 src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
280 src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
281 src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
282 }
283 else {
284 src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
285 src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
286 src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
287 src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
288 }
289 break;
290 case FRAG_ATTRIB_FOGC:
291 if (xOrY == 'X') {
292 src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
293 src[1] = 0.0;
294 src[2] = 0.0;
295 src[3] = 0.0;
296 }
297 else {
298 src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
299 src[1] = 0.0;
300 src[2] = 0.0;
301 src[3] = 0.0;
302 }
303 break;
304 default:
305 assert(source->Index < FRAG_ATTRIB_MAX);
306 /* texcoord or varying */
307 if (xOrY == 'X') {
308 /* this is a little tricky - I think I've got it right */
309 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
Briane80d9012007-02-23 16:53:24 -0700310 +
311 span->attrStepX[source->Index][3] *
312 column);
Brian13e3b212007-02-22 16:09:40 -0700313 src[0] = span->attrStepX[source->Index][0] * invQ;
314 src[1] = span->attrStepX[source->Index][1] * invQ;
315 src[2] = span->attrStepX[source->Index][2] * invQ;
316 src[3] = span->attrStepX[source->Index][3] * invQ;
317 }
318 else {
319 /* Tricky, as above, but in Y direction */
320 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
321 + span->attrStepY[source->Index][3]);
322 src[0] = span->attrStepY[source->Index][0] * invQ;
323 src[1] = span->attrStepY[source->Index][1] * invQ;
324 src[2] = span->attrStepY[source->Index][2] * invQ;
325 src[3] = span->attrStepY[source->Index][3] * invQ;
326 }
327 break;
328 }
329
330 result[0] = src[GET_SWZ(source->Swizzle, 0)];
331 result[1] = src[GET_SWZ(source->Swizzle, 1)];
332 result[2] = src[GET_SWZ(source->Swizzle, 2)];
333 result[3] = src[GET_SWZ(source->Swizzle, 3)];
334
335 if (source->NegateBase) {
336 result[0] = -result[0];
337 result[1] = -result[1];
338 result[2] = -result[2];
339 result[3] = -result[3];
340 }
341 if (source->Abs) {
342 result[0] = FABSF(result[0]);
343 result[1] = FABSF(result[1]);
344 result[2] = FABSF(result[2]);
345 result[3] = FABSF(result[3]);
346 }
347 if (source->NegateAbs) {
348 result[0] = -result[0];
349 result[1] = -result[1];
350 result[2] = -result[2];
351 result[3] = -result[3];
352 }
353 return GL_TRUE;
354}
355#endif
356
357
358/**
359 * As above, but only return result[0] element.
360 */
361static void
Briane80d9012007-02-23 16:53:24 -0700362fetch_vector1(GLcontext * ctx,
363 const struct prog_src_register *source,
364 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700365{
366 const GLfloat *src = get_register_pointer(ctx, source, machine);
367 ASSERT(src);
368
369 result[0] = src[GET_SWZ(source->Swizzle, 0)];
370
371 if (source->NegateBase) {
372 result[0] = -result[0];
373 }
374 if (source->Abs) {
375 result[0] = FABSF(result[0]);
376 }
377 if (source->NegateAbs) {
378 result[0] = -result[0];
379 }
380}
381
382
383/**
384 * Test value against zero and return GT, LT, EQ or UN if NaN.
385 */
386static INLINE GLuint
Briane80d9012007-02-23 16:53:24 -0700387generate_cc(float value)
Brian13e3b212007-02-22 16:09:40 -0700388{
389 if (value != value)
Briane80d9012007-02-23 16:53:24 -0700390 return COND_UN; /* NaN */
Brian13e3b212007-02-22 16:09:40 -0700391 if (value > 0.0F)
392 return COND_GT;
393 if (value < 0.0F)
394 return COND_LT;
395 return COND_EQ;
396}
397
398
399/**
400 * Test if the ccMaskRule is satisfied by the given condition code.
401 * Used to mask destination writes according to the current condition code.
402 */
403static INLINE GLboolean
404test_cc(GLuint condCode, GLuint ccMaskRule)
405{
406 switch (ccMaskRule) {
407 case COND_EQ: return (condCode == COND_EQ);
408 case COND_NE: return (condCode != COND_EQ);
409 case COND_LT: return (condCode == COND_LT);
410 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
411 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
412 case COND_GT: return (condCode == COND_GT);
413 case COND_TR: return GL_TRUE;
414 case COND_FL: return GL_FALSE;
415 default: return GL_TRUE;
416 }
417}
418
419
420/**
421 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
422 * or GL_FALSE to indicate result.
423 */
424static INLINE GLboolean
425eval_condition(const struct gl_program_machine *machine,
426 const struct prog_instruction *inst)
427{
428 const GLuint swizzle = inst->DstReg.CondSwizzle;
429 const GLuint condMask = inst->DstReg.CondMask;
430 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
431 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
432 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
433 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
434 return GL_TRUE;
435 }
436 else {
437 return GL_FALSE;
438 }
439}
440
441
442
443/**
444 * Store 4 floats into a register. Observe the instructions saturate and
445 * set-condition-code flags.
446 */
447static void
Briane80d9012007-02-23 16:53:24 -0700448store_vector4(const struct prog_instruction *inst,
449 struct gl_program_machine *machine, const GLfloat value[4])
Brian13e3b212007-02-22 16:09:40 -0700450{
451 const struct prog_dst_register *dest = &(inst->DstReg);
452 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
453 GLfloat *dstReg;
454 GLfloat dummyReg[4];
455 GLfloat clampedValue[4];
456 GLuint writeMask = dest->WriteMask;
457
458 switch (dest->File) {
Briane80d9012007-02-23 16:53:24 -0700459 case PROGRAM_OUTPUT:
460 dstReg = machine->Outputs[dest->Index];
461 break;
462 case PROGRAM_TEMPORARY:
463 dstReg = machine->Temporaries[dest->Index];
464 break;
465 case PROGRAM_WRITE_ONLY:
466 dstReg = dummyReg;
467 return;
468 default:
469 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
470 return;
Brian13e3b212007-02-22 16:09:40 -0700471 }
472
473#if 0
474 if (value[0] > 1.0e10 ||
475 IS_INF_OR_NAN(value[0]) ||
476 IS_INF_OR_NAN(value[1]) ||
Briane80d9012007-02-23 16:53:24 -0700477 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
Brian13e3b212007-02-22 16:09:40 -0700478 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
479#endif
480
481 if (clamp) {
482 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
483 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
484 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
485 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
486 value = clampedValue;
487 }
488
489 if (dest->CondMask != COND_TR) {
490 /* condition codes may turn off some writes */
491 if (writeMask & WRITEMASK_X) {
492 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
493 dest->CondMask))
494 writeMask &= ~WRITEMASK_X;
495 }
496 if (writeMask & WRITEMASK_Y) {
497 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
498 dest->CondMask))
499 writeMask &= ~WRITEMASK_Y;
500 }
501 if (writeMask & WRITEMASK_Z) {
502 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
503 dest->CondMask))
504 writeMask &= ~WRITEMASK_Z;
505 }
506 if (writeMask & WRITEMASK_W) {
507 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
508 dest->CondMask))
509 writeMask &= ~WRITEMASK_W;
510 }
511 }
512
513 if (writeMask & WRITEMASK_X)
514 dstReg[0] = value[0];
515 if (writeMask & WRITEMASK_Y)
516 dstReg[1] = value[1];
517 if (writeMask & WRITEMASK_Z)
518 dstReg[2] = value[2];
519 if (writeMask & WRITEMASK_W)
520 dstReg[3] = value[3];
521
522 if (inst->CondUpdate) {
523 if (writeMask & WRITEMASK_X)
524 machine->CondCodes[0] = generate_cc(value[0]);
525 if (writeMask & WRITEMASK_Y)
526 machine->CondCodes[1] = generate_cc(value[1]);
527 if (writeMask & WRITEMASK_Z)
528 machine->CondCodes[2] = generate_cc(value[2]);
529 if (writeMask & WRITEMASK_W)
530 machine->CondCodes[3] = generate_cc(value[3]);
531 }
532}
533
534
535#if 0
536/**
537 * Initialize a new machine state instance from an existing one, adding
538 * the partial derivatives onto the input registers.
539 * Used to implement DDX and DDY instructions in non-trivial cases.
540 */
541static void
Briane80d9012007-02-23 16:53:24 -0700542init_machine_deriv(GLcontext * ctx,
543 const struct gl_program_machine *machine,
544 const struct gl_fragment_program *program,
545 const SWspan * span, char xOrY,
546 struct gl_program_machine *dMachine)
Brian13e3b212007-02-22 16:09:40 -0700547{
548 GLuint attr;
549
550 ASSERT(xOrY == 'X' || xOrY == 'Y');
551
552 /* copy existing machine */
553 _mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
554
555 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
556 /* XXX also need to do this when using valgrind */
557 /* Clear temporary registers (undefined for ARB_f_p) */
Briane80d9012007-02-23 16:53:24 -0700558 _mesa_bzero((void *) machine->Temporaries,
559 MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
Brian13e3b212007-02-22 16:09:40 -0700560 }
561
562 /* Add derivatives */
563 if (program->Base.InputsRead & FRAG_BIT_WPOS) {
564 GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
565 if (xOrY == 'X') {
566 wpos[0] += 1.0F;
567 wpos[1] += 0.0F;
568 wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
569 wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
570 }
571 else {
572 wpos[0] += 0.0F;
573 wpos[1] += 1.0F;
574 wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
575 wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
576 }
577 }
578
579 /* primary, secondary colors */
580 for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
581 if (program->Base.InputsRead & (1 << attr)) {
582 GLfloat *col = machine->Attribs[attr][machine->CurElement];
583 if (xOrY == 'X') {
584 col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
585 col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
586 col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
587 col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
588 }
589 else {
590 col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
591 col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
592 col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
593 col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
594 }
595 }
596 }
597 if (program->Base.InputsRead & FRAG_BIT_FOGC) {
598 GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
599 if (xOrY == 'X') {
600 fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
601 }
602 else {
603 fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
604 }
605 }
606 /* texcoord and varying vars */
607 for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
608 if (program->Base.InputsRead & (1 << attr)) {
609 GLfloat *val = machine->Attribs[attr][machine->CurElement];
610 /* XXX perspective-correct interpolation */
611 if (xOrY == 'X') {
612 val[0] += span->attrStepX[attr][0];
613 val[1] += span->attrStepX[attr][1];
614 val[2] += span->attrStepX[attr][2];
615 val[3] += span->attrStepX[attr][3];
616 }
617 else {
618 val[0] += span->attrStepY[attr][0];
619 val[1] += span->attrStepY[attr][1];
620 val[2] += span->attrStepY[attr][2];
621 val[3] += span->attrStepY[attr][3];
622 }
623 }
624 }
625
626 /* init condition codes */
627 dMachine->CondCodes[0] = COND_EQ;
628 dMachine->CondCodes[1] = COND_EQ;
629 dMachine->CondCodes[2] = COND_EQ;
630 dMachine->CondCodes[3] = COND_EQ;
631}
632#endif
633
634
635/**
636 * Execute the given vertex/fragment program.
637 *
638 * \param ctx - rendering context
639 * \param program - the fragment program to execute
640 * \param machine - machine state (register file)
641 * \param maxInst - max number of instructions to execute
642 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
643 */
644GLboolean
Briane80d9012007-02-23 16:53:24 -0700645_mesa_execute_program(GLcontext * ctx,
Brian13e3b212007-02-22 16:09:40 -0700646 const struct gl_program *program, GLuint maxInst,
647 struct gl_program_machine *machine, GLuint element)
648{
649 const GLuint MAX_EXEC = 10000;
650 GLint pc, total = 0;
651
652 machine->CurProgram = program;
653
654 if (DEBUG_PROG) {
655 printf("execute program %u --------------------\n", program->Id);
656 }
657
658#if FEATURE_MESA_program_debug
659 CurrentMachine = machine;
660#endif
661
662 for (pc = 0; pc < maxInst; pc++) {
663 const struct prog_instruction *inst = program->Instructions + pc;
664
665#if FEATURE_MESA_program_debug
666 if (ctx->FragmentProgram.CallbackEnabled &&
667 ctx->FragmentProgram.Callback) {
668 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
669 ctx->FragmentProgram.Callback(program->Target,
670 ctx->FragmentProgram.CallbackData);
671 }
672#endif
673
674 if (DEBUG_PROG) {
675 _mesa_print_instruction(inst);
676 }
677
678 switch (inst->Opcode) {
Briane80d9012007-02-23 16:53:24 -0700679 case OPCODE_ABS:
680 {
681 GLfloat a[4], result[4];
682 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
683 result[0] = FABSF(a[0]);
684 result[1] = FABSF(a[1]);
685 result[2] = FABSF(a[2]);
686 result[3] = FABSF(a[3]);
687 store_vector4(inst, machine, result);
688 }
689 break;
690 case OPCODE_ADD:
691 {
692 GLfloat a[4], b[4], result[4];
693 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
694 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
695 result[0] = a[0] + b[0];
696 result[1] = a[1] + b[1];
697 result[2] = a[2] + b[2];
698 result[3] = a[3] + b[3];
699 store_vector4(inst, machine, result);
700 if (DEBUG_PROG) {
701 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
702 result[0], result[1], result[2], result[3],
703 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700704 }
Briane80d9012007-02-23 16:53:24 -0700705 }
706 break;
Brianf183a2d2007-02-23 17:14:30 -0700707 case OPCODE_ARL:
708 {
709 GLfloat t[4];
710 fetch_vector4(ctx, &inst->SrcReg[0], machine, t);
711 machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
712 }
713 break;
Briane80d9012007-02-23 16:53:24 -0700714 case OPCODE_BGNLOOP:
715 /* no-op */
716 break;
717 case OPCODE_ENDLOOP:
718 /* subtract 1 here since pc is incremented by for(pc) loop */
719 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
720 break;
721 case OPCODE_BGNSUB: /* begin subroutine */
722 break;
723 case OPCODE_ENDSUB: /* end subroutine */
724 break;
725 case OPCODE_BRA: /* branch (conditional) */
726 /* fall-through */
727 case OPCODE_BRK: /* break out of loop (conditional) */
728 /* fall-through */
729 case OPCODE_CONT: /* continue loop (conditional) */
730 if (eval_condition(machine, inst)) {
731 /* take branch */
732 /* Subtract 1 here since we'll do pc++ at end of for-loop */
733 pc = inst->BranchTarget - 1;
734 }
735 break;
736 case OPCODE_CAL: /* Call subroutine (conditional) */
737 if (eval_condition(machine, inst)) {
738 /* call the subroutine */
739 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
740 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -0700741 }
Briane80d9012007-02-23 16:53:24 -0700742 machine->CallStack[machine->StackDepth++] = pc + 1;
743 pc = inst->BranchTarget; /* XXX - 1 ??? */
744 }
745 break;
746 case OPCODE_CMP:
747 {
748 GLfloat a[4], b[4], c[4], result[4];
749 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
750 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
751 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
752 result[0] = a[0] < 0.0F ? b[0] : c[0];
753 result[1] = a[1] < 0.0F ? b[1] : c[1];
754 result[2] = a[2] < 0.0F ? b[2] : c[2];
755 result[3] = a[3] < 0.0F ? b[3] : c[3];
756 store_vector4(inst, machine, result);
757 }
758 break;
759 case OPCODE_COS:
760 {
761 GLfloat a[4], result[4];
762 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
763 result[0] = result[1] = result[2] = result[3]
764 = (GLfloat) _mesa_cos(a[0]);
765 store_vector4(inst, machine, result);
766 }
767 break;
768 case OPCODE_DDX: /* Partial derivative with respect to X */
769 {
Brian13e3b212007-02-22 16:09:40 -0700770#if 0
Briane80d9012007-02-23 16:53:24 -0700771 GLfloat a[4], aNext[4], result[4];
772 struct gl_program_machine dMachine;
773 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
774 column, result)) {
775 /* This is tricky. Make a copy of the current machine state,
776 * increment the input registers by the dx or dy partial
777 * derivatives, then re-execute the program up to the
778 * preceeding instruction, then fetch the source register.
779 * Finally, find the difference in the register values for
780 * the original and derivative runs.
781 */
782 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
783 init_machine_deriv(ctx, machine, program, span,
784 'X', &dMachine);
785 execute_program(ctx, program, pc, &dMachine, span, column);
786 fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
787 aNext);
788 result[0] = aNext[0] - a[0];
789 result[1] = aNext[1] - a[1];
790 result[2] = aNext[2] - a[2];
791 result[3] = aNext[3] - a[3];
Brian13e3b212007-02-22 16:09:40 -0700792 }
Briane80d9012007-02-23 16:53:24 -0700793 store_vector4(inst, machine, result);
794#else
Brianf183a2d2007-02-23 17:14:30 -0700795 store_vector4(inst, machine, ZeroVec);
Briane80d9012007-02-23 16:53:24 -0700796#endif
797 }
798 break;
799 case OPCODE_DDY: /* Partial derivative with respect to Y */
800 {
Brian13e3b212007-02-22 16:09:40 -0700801#if 0
Briane80d9012007-02-23 16:53:24 -0700802 GLfloat a[4], aNext[4], result[4];
803 struct gl_program_machine dMachine;
804 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
805 column, result)) {
806 init_machine_deriv(ctx, machine, program, span,
807 'Y', &dMachine);
808 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
809 execute_program(ctx, program, pc, &dMachine, span, column);
810 fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
811 aNext);
812 result[0] = aNext[0] - a[0];
813 result[1] = aNext[1] - a[1];
814 result[2] = aNext[2] - a[2];
815 result[3] = aNext[3] - a[3];
816 }
817 store_vector4(inst, machine, result);
Brian13e3b212007-02-22 16:09:40 -0700818#else
Brianf183a2d2007-02-23 17:14:30 -0700819 store_vector4(inst, machine, ZeroVec);
Brian13e3b212007-02-22 16:09:40 -0700820#endif
Briane80d9012007-02-23 16:53:24 -0700821 }
822 break;
823 case OPCODE_DP3:
824 {
825 GLfloat a[4], b[4], result[4];
826 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
827 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
828 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
829 store_vector4(inst, machine, result);
830 if (DEBUG_PROG) {
831 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
832 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
Brian13e3b212007-02-22 16:09:40 -0700833 }
Briane80d9012007-02-23 16:53:24 -0700834 }
835 break;
836 case OPCODE_DP4:
837 {
838 GLfloat a[4], b[4], result[4];
839 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
840 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
841 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
842 store_vector4(inst, machine, result);
843 if (DEBUG_PROG) {
844 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
845 result[0], a[0], a[1], a[2], a[3],
846 b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700847 }
Briane80d9012007-02-23 16:53:24 -0700848 }
849 break;
850 case OPCODE_DPH:
851 {
852 GLfloat a[4], b[4], result[4];
853 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
854 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
855 result[0] = result[1] = result[2] = result[3] =
856 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
857 store_vector4(inst, machine, result);
858 }
859 break;
860 case OPCODE_DST: /* Distance vector */
861 {
862 GLfloat a[4], b[4], result[4];
863 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
864 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
865 result[0] = 1.0F;
866 result[1] = a[1] * b[1];
867 result[2] = a[2];
868 result[3] = b[3];
869 store_vector4(inst, machine, result);
870 }
871 break;
Brianf183a2d2007-02-23 17:14:30 -0700872 case OPCODE_EXP:
873 /* XXX currently broken! */
874 {
875 GLfloat t[4], q[4], floor_t0;
876 fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
877 floor_t0 = FLOORF(t[0]);
878 if (floor_t0 > FLT_MAX_EXP) {
879 SET_POS_INFINITY(q[0]);
880 SET_POS_INFINITY(q[2]);
881 }
882 else if (floor_t0 < FLT_MIN_EXP) {
883 q[0] = 0.0F;
884 q[2] = 0.0F;
885 }
886 else {
887#ifdef USE_IEEE
888 GLint ii = (GLint) floor_t0;
889 ii = (ii < 23) + 0x3f800000;
890 SET_FLOAT_BITS(q[0], ii);
891 q[0] = *((GLfloat *) (void *)&ii);
892#else
893 q[0] = (GLfloat) pow(2.0, floor_t0);
894#endif
895 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
896 }
897 q[1] = t[0] - floor_t0;
898 q[3] = 1.0F;
899 store_vector4( inst, machine, q );
900 }
901 break;
Briane80d9012007-02-23 16:53:24 -0700902 case OPCODE_EX2: /* Exponential base 2 */
903 {
904 GLfloat a[4], result[4];
905 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
906 result[0] = result[1] = result[2] = result[3] =
907 (GLfloat) _mesa_pow(2.0, a[0]);
908 store_vector4(inst, machine, result);
909 }
910 break;
911 case OPCODE_FLR:
912 {
913 GLfloat a[4], result[4];
914 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
915 result[0] = FLOORF(a[0]);
916 result[1] = FLOORF(a[1]);
917 result[2] = FLOORF(a[2]);
918 result[3] = FLOORF(a[3]);
919 store_vector4(inst, machine, result);
920 }
921 break;
922 case OPCODE_FRC:
923 {
924 GLfloat a[4], result[4];
925 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
926 result[0] = a[0] - FLOORF(a[0]);
927 result[1] = a[1] - FLOORF(a[1]);
928 result[2] = a[2] - FLOORF(a[2]);
929 result[3] = a[3] - FLOORF(a[3]);
930 store_vector4(inst, machine, result);
931 }
932 break;
933 case OPCODE_IF:
934 if (eval_condition(machine, inst)) {
935 /* do if-clause (just continue execution) */
936 }
937 else {
938 /* go to the instruction after ELSE or ENDIF */
Brian13e3b212007-02-22 16:09:40 -0700939 assert(inst->BranchTarget >= 0);
940 pc = inst->BranchTarget - 1;
Briane80d9012007-02-23 16:53:24 -0700941 }
942 break;
943 case OPCODE_ELSE:
944 /* goto ENDIF */
945 assert(inst->BranchTarget >= 0);
946 pc = inst->BranchTarget - 1;
947 break;
948 case OPCODE_ENDIF:
949 /* nothing */
950 break;
951 case OPCODE_INT: /* float to int */
952 {
953 GLfloat a[4], result[4];
954 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
955 result[0] = (GLfloat) (GLint) a[0];
956 result[1] = (GLfloat) (GLint) a[1];
957 result[2] = (GLfloat) (GLint) a[2];
958 result[3] = (GLfloat) (GLint) a[3];
959 store_vector4(inst, machine, result);
960 }
961 break;
962 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
963 if (eval_condition(machine, inst)) {
964 return GL_FALSE;
965 }
966 break;
967 case OPCODE_KIL: /* ARB_f_p only */
968 {
969 GLfloat a[4];
970 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
971 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
Brian13e3b212007-02-22 16:09:40 -0700972 return GL_FALSE;
973 }
Briane80d9012007-02-23 16:53:24 -0700974 }
975 break;
976 case OPCODE_LG2: /* log base 2 */
977 {
978 GLfloat a[4], result[4];
979 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
980 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
981 store_vector4(inst, machine, result);
982 }
983 break;
984 case OPCODE_LIT:
985 {
986 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
987 GLfloat a[4], result[4];
988 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
989 a[0] = MAX2(a[0], 0.0F);
990 a[1] = MAX2(a[1], 0.0F);
991 /* XXX ARB version clamps a[3], NV version doesn't */
992 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
993 result[0] = 1.0F;
994 result[1] = a[0];
995 /* XXX we could probably just use pow() here */
996 if (a[0] > 0.0F) {
997 if (a[1] == 0.0 && a[3] == 0.0)
998 result[2] = 1.0;
999 else
1000 result[2] = EXPF(a[3] * LOGF(a[1]));
Brian13e3b212007-02-22 16:09:40 -07001001 }
Briane80d9012007-02-23 16:53:24 -07001002 else {
1003 result[2] = 0.0;
Brian13e3b212007-02-22 16:09:40 -07001004 }
Briane80d9012007-02-23 16:53:24 -07001005 result[3] = 1.0F;
1006 store_vector4(inst, machine, result);
1007 if (DEBUG_PROG) {
1008 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1009 result[0], result[1], result[2], result[3],
1010 a[0], a[1], a[2], a[3]);
Brian13e3b212007-02-22 16:09:40 -07001011 }
Briane80d9012007-02-23 16:53:24 -07001012 }
1013 break;
Brianf183a2d2007-02-23 17:14:30 -07001014 case OPCODE_LOG:
1015 {
1016 GLfloat t[4], q[4], abs_t0;
1017 fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
1018 abs_t0 = FABSF(t[0]);
1019 if (abs_t0 != 0.0F) {
1020 /* Since we really can't handle infinite values on VMS
1021 * like other OSes we'll use __MAXFLOAT to represent
1022 * infinity. This may need some tweaking.
1023 */
1024#ifdef VMS
1025 if (abs_t0 == __MAXFLOAT)
1026#else
1027 if (IS_INF_OR_NAN(abs_t0))
1028#endif
1029 {
1030 SET_POS_INFINITY(q[0]);
1031 q[1] = 1.0F;
1032 SET_POS_INFINITY(q[2]);
1033 }
1034 else {
1035 int exponent;
1036 GLfloat mantissa = FREXPF(t[0], &exponent);
1037 q[0] = (GLfloat) (exponent - 1);
1038 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1039 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
1040 }
1041 }
1042 else {
1043 SET_NEG_INFINITY(q[0]);
1044 q[1] = 1.0F;
1045 SET_NEG_INFINITY(q[2]);
1046 }
1047 q[3] = 1.0;
1048 store_vector4(inst, machine, q);
1049 }
1050 break;
Briane80d9012007-02-23 16:53:24 -07001051 case OPCODE_LRP:
1052 {
1053 GLfloat a[4], b[4], c[4], result[4];
1054 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1055 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1056 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
1057 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1058 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1059 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1060 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1061 store_vector4(inst, machine, result);
1062 if (DEBUG_PROG) {
1063 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1064 "(%g %g %g %g), (%g %g %g %g)\n",
1065 result[0], result[1], result[2], result[3],
1066 a[0], a[1], a[2], a[3],
1067 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001068 }
Briane80d9012007-02-23 16:53:24 -07001069 }
1070 break;
1071 case OPCODE_MAD:
1072 {
1073 GLfloat a[4], b[4], c[4], result[4];
1074 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1075 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1076 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
1077 result[0] = a[0] * b[0] + c[0];
1078 result[1] = a[1] * b[1] + c[1];
1079 result[2] = a[2] * b[2] + c[2];
1080 result[3] = a[3] * b[3] + c[3];
1081 store_vector4(inst, machine, result);
1082 if (DEBUG_PROG) {
1083 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1084 "(%g %g %g %g) + (%g %g %g %g)\n",
1085 result[0], result[1], result[2], result[3],
1086 a[0], a[1], a[2], a[3],
1087 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001088 }
Briane80d9012007-02-23 16:53:24 -07001089 }
1090 break;
1091 case OPCODE_MAX:
1092 {
1093 GLfloat a[4], b[4], result[4];
1094 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1095 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1096 result[0] = MAX2(a[0], b[0]);
1097 result[1] = MAX2(a[1], b[1]);
1098 result[2] = MAX2(a[2], b[2]);
1099 result[3] = MAX2(a[3], b[3]);
1100 store_vector4(inst, machine, result);
1101 if (DEBUG_PROG) {
1102 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1103 result[0], result[1], result[2], result[3],
1104 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001105 }
Briane80d9012007-02-23 16:53:24 -07001106 }
1107 break;
1108 case OPCODE_MIN:
1109 {
1110 GLfloat a[4], b[4], result[4];
1111 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1112 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1113 result[0] = MIN2(a[0], b[0]);
1114 result[1] = MIN2(a[1], b[1]);
1115 result[2] = MIN2(a[2], b[2]);
1116 result[3] = MIN2(a[3], b[3]);
1117 store_vector4(inst, machine, result);
1118 }
1119 break;
1120 case OPCODE_MOV:
1121 {
1122 GLfloat result[4];
1123 fetch_vector4(ctx, &inst->SrcReg[0], machine, result);
1124 store_vector4(inst, machine, result);
1125 if (DEBUG_PROG) {
1126 printf("MOV (%g %g %g %g)\n",
1127 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001128 }
Briane80d9012007-02-23 16:53:24 -07001129 }
1130 break;
1131 case OPCODE_MUL:
1132 {
1133 GLfloat a[4], b[4], result[4];
1134 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1135 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1136 result[0] = a[0] * b[0];
1137 result[1] = a[1] * b[1];
1138 result[2] = a[2] * b[2];
1139 result[3] = a[3] * b[3];
1140 store_vector4(inst, machine, result);
1141 if (DEBUG_PROG) {
1142 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1143 result[0], result[1], result[2], result[3],
1144 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001145 }
Briane80d9012007-02-23 16:53:24 -07001146 }
1147 break;
1148 case OPCODE_NOISE1:
1149 {
1150 GLfloat a[4], result[4];
1151 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1152 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001153 result[1] =
Briane80d9012007-02-23 16:53:24 -07001154 result[2] = result[3] = _slang_library_noise1(a[0]);
1155 store_vector4(inst, machine, result);
1156 }
1157 break;
1158 case OPCODE_NOISE2:
1159 {
1160 GLfloat a[4], result[4];
1161 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1162 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001163 result[1] =
Briane80d9012007-02-23 16:53:24 -07001164 result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1165 store_vector4(inst, machine, result);
1166 }
1167 break;
1168 case OPCODE_NOISE3:
1169 {
1170 GLfloat a[4], result[4];
1171 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1172 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001173 result[1] =
1174 result[2] =
1175 result[3] = _slang_library_noise3(a[0], a[1], a[2]);
Briane80d9012007-02-23 16:53:24 -07001176 store_vector4(inst, machine, result);
1177 }
1178 break;
1179 case OPCODE_NOISE4:
1180 {
1181 GLfloat a[4], result[4];
1182 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1183 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001184 result[1] =
1185 result[2] =
1186 result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
Briane80d9012007-02-23 16:53:24 -07001187 store_vector4(inst, machine, result);
1188 }
1189 break;
1190 case OPCODE_NOP:
1191 break;
1192 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1193 {
1194 GLfloat a[4], result[4];
1195 GLhalfNV hx, hy;
1196 GLuint *rawResult = (GLuint *) result;
1197 GLuint twoHalves;
1198 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1199 hx = _mesa_float_to_half(a[0]);
1200 hy = _mesa_float_to_half(a[1]);
1201 twoHalves = hx | (hy << 16);
1202 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1203 = twoHalves;
1204 store_vector4(inst, machine, result);
1205 }
1206 break;
1207 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1208 {
1209 GLfloat a[4], result[4];
1210 GLuint usx, usy, *rawResult = (GLuint *) result;
1211 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1212 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1213 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1214 usx = IROUND(a[0] * 65535.0F);
1215 usy = IROUND(a[1] * 65535.0F);
1216 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1217 = usx | (usy << 16);
1218 store_vector4(inst, machine, result);
1219 }
1220 break;
1221 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1222 {
1223 GLfloat a[4], result[4];
1224 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1225 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1226 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1227 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1228 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1229 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1230 ubx = IROUND(127.0F * a[0] + 128.0F);
1231 uby = IROUND(127.0F * a[1] + 128.0F);
1232 ubz = IROUND(127.0F * a[2] + 128.0F);
1233 ubw = IROUND(127.0F * a[3] + 128.0F);
1234 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1235 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1236 store_vector4(inst, machine, result);
1237 }
1238 break;
1239 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1240 {
1241 GLfloat a[4], result[4];
1242 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1243 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1244 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1245 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1246 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1247 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1248 ubx = IROUND(255.0F * a[0]);
1249 uby = IROUND(255.0F * a[1]);
1250 ubz = IROUND(255.0F * a[2]);
1251 ubw = IROUND(255.0F * a[3]);
1252 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1253 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1254 store_vector4(inst, machine, result);
1255 }
1256 break;
1257 case OPCODE_POW:
1258 {
1259 GLfloat a[4], b[4], result[4];
1260 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1261 fetch_vector1(ctx, &inst->SrcReg[1], machine, b);
1262 result[0] = result[1] = result[2] = result[3]
1263 = (GLfloat) _mesa_pow(a[0], b[0]);
1264 store_vector4(inst, machine, result);
1265 }
1266 break;
1267 case OPCODE_RCP:
1268 {
1269 GLfloat a[4], result[4];
1270 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1271 if (DEBUG_PROG) {
1272 if (a[0] == 0)
1273 printf("RCP(0)\n");
1274 else if (IS_INF_OR_NAN(a[0]))
1275 printf("RCP(inf)\n");
Brian13e3b212007-02-22 16:09:40 -07001276 }
Briane80d9012007-02-23 16:53:24 -07001277 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1278 store_vector4(inst, machine, result);
1279 }
1280 break;
1281 case OPCODE_RET: /* return from subroutine (conditional) */
1282 if (eval_condition(machine, inst)) {
1283 if (machine->StackDepth == 0) {
1284 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -07001285 }
Briane80d9012007-02-23 16:53:24 -07001286 pc = machine->CallStack[--machine->StackDepth];
1287 }
1288 break;
1289 case OPCODE_RFL: /* reflection vector */
1290 {
1291 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1292 fetch_vector4(ctx, &inst->SrcReg[0], machine, axis);
1293 fetch_vector4(ctx, &inst->SrcReg[1], machine, dir);
1294 tmpW = DOT3(axis, axis);
1295 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1296 result[0] = tmpX * axis[0] - dir[0];
1297 result[1] = tmpX * axis[1] - dir[1];
1298 result[2] = tmpX * axis[2] - dir[2];
1299 /* result[3] is never written! XXX enforce in parser! */
1300 store_vector4(inst, machine, result);
1301 }
1302 break;
1303 case OPCODE_RSQ: /* 1 / sqrt() */
1304 {
1305 GLfloat a[4], result[4];
1306 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1307 a[0] = FABSF(a[0]);
1308 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1309 store_vector4(inst, machine, result);
1310 if (DEBUG_PROG) {
1311 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
Brian13e3b212007-02-22 16:09:40 -07001312 }
Briane80d9012007-02-23 16:53:24 -07001313 }
1314 break;
1315 case OPCODE_SCS: /* sine and cos */
1316 {
1317 GLfloat a[4], result[4];
1318 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1319 result[0] = (GLfloat) _mesa_cos(a[0]);
1320 result[1] = (GLfloat) _mesa_sin(a[0]);
1321 result[2] = 0.0; /* undefined! */
1322 result[3] = 0.0; /* undefined! */
1323 store_vector4(inst, machine, result);
1324 }
1325 break;
1326 case OPCODE_SEQ: /* set on equal */
1327 {
1328 GLfloat a[4], b[4], result[4];
1329 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1330 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1331 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1332 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1333 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1334 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1335 store_vector4(inst, machine, result);
1336 }
1337 break;
1338 case OPCODE_SFL: /* set false, operands ignored */
1339 {
1340 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1341 store_vector4(inst, machine, result);
1342 }
1343 break;
1344 case OPCODE_SGE: /* set on greater or equal */
1345 {
1346 GLfloat a[4], b[4], result[4];
1347 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1348 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1349 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1350 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1351 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1352 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1353 store_vector4(inst, machine, result);
1354 }
1355 break;
1356 case OPCODE_SGT: /* set on greater */
1357 {
1358 GLfloat a[4], b[4], result[4];
1359 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1360 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1361 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1362 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1363 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1364 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1365 store_vector4(inst, machine, result);
1366 if (DEBUG_PROG) {
1367 printf("SGT %g %g %g %g\n",
1368 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001369 }
Briane80d9012007-02-23 16:53:24 -07001370 }
1371 break;
1372 case OPCODE_SIN:
1373 {
1374 GLfloat a[4], result[4];
1375 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1376 result[0] = result[1] = result[2] = result[3]
1377 = (GLfloat) _mesa_sin(a[0]);
1378 store_vector4(inst, machine, result);
1379 }
1380 break;
1381 case OPCODE_SLE: /* set on less or equal */
1382 {
1383 GLfloat a[4], b[4], result[4];
1384 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1385 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1386 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1387 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1388 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1389 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1390 store_vector4(inst, machine, result);
1391 }
1392 break;
1393 case OPCODE_SLT: /* set on less */
1394 {
1395 GLfloat a[4], b[4], result[4];
1396 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1397 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1398 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1399 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1400 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1401 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1402 store_vector4(inst, machine, result);
1403 }
1404 break;
1405 case OPCODE_SNE: /* set on not equal */
1406 {
1407 GLfloat a[4], b[4], result[4];
1408 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1409 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1410 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1411 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1412 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1413 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1414 store_vector4(inst, machine, result);
1415 }
1416 break;
1417 case OPCODE_STR: /* set true, operands ignored */
1418 {
1419 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1420 store_vector4(inst, machine, result);
1421 }
1422 break;
1423 case OPCODE_SUB:
1424 {
1425 GLfloat a[4], b[4], result[4];
1426 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1427 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1428 result[0] = a[0] - b[0];
1429 result[1] = a[1] - b[1];
1430 result[2] = a[2] - b[2];
1431 result[3] = a[3] - b[3];
1432 store_vector4(inst, machine, result);
1433 if (DEBUG_PROG) {
1434 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1435 result[0], result[1], result[2], result[3],
1436 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001437 }
Briane80d9012007-02-23 16:53:24 -07001438 }
1439 break;
1440 case OPCODE_SWZ: /* extended swizzle */
1441 {
1442 const struct prog_src_register *source = &inst->SrcReg[0];
1443 const GLfloat *src = get_register_pointer(ctx, source, machine);
1444 GLfloat result[4];
1445 GLuint i;
1446 for (i = 0; i < 4; i++) {
1447 const GLuint swz = GET_SWZ(source->Swizzle, i);
1448 if (swz == SWIZZLE_ZERO)
1449 result[i] = 0.0;
1450 else if (swz == SWIZZLE_ONE)
1451 result[i] = 1.0;
Brian13e3b212007-02-22 16:09:40 -07001452 else {
Briane80d9012007-02-23 16:53:24 -07001453 ASSERT(swz >= 0);
1454 ASSERT(swz <= 3);
1455 result[i] = src[swz];
Brian13e3b212007-02-22 16:09:40 -07001456 }
Briane80d9012007-02-23 16:53:24 -07001457 if (source->NegateBase & (1 << i))
1458 result[i] = -result[i];
Brian13e3b212007-02-22 16:09:40 -07001459 }
Briane80d9012007-02-23 16:53:24 -07001460 store_vector4(inst, machine, result);
1461 }
1462 break;
1463 case OPCODE_TEX: /* Both ARB and NV frag prog */
1464 /* Texel lookup */
1465 {
1466 /* Note: only use the precomputed lambda value when we're
1467 * sampling texture unit [K] with texcoord[K].
1468 * Otherwise, the lambda value may have no relation to the
1469 * instruction's texcoord or texture image. Using the wrong
1470 * lambda is usually bad news.
1471 * The rest of the time, just use zero (until we get a more
1472 * sophisticated way of computing lambda).
1473 */
1474 GLfloat coord[4], color[4], lambda;
1475#if 0
1476 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1477 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1478 lambda = span->array->lambda[inst->TexSrcUnit][column];
1479 else
1480#endif
1481 lambda = 0.0;
1482 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1483 machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit,
1484 color);
1485 if (DEBUG_PROG) {
1486 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1487 "lod %f\n",
1488 color[0], color[1], color[2], color[3],
1489 inst->TexSrcUnit,
1490 coord[0], coord[1], coord[2], coord[3], lambda);
1491 }
1492 store_vector4(inst, machine, color);
1493 }
1494 break;
1495 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1496 /* Texel lookup with LOD bias */
1497 {
1498 const struct gl_texture_unit *texUnit
1499 = &ctx->Texture.Unit[inst->TexSrcUnit];
1500 GLfloat coord[4], color[4], lambda, bias;
1501#if 0
1502 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1503 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1504 lambda = span->array->lambda[inst->TexSrcUnit][column];
1505 else
1506#endif
1507 lambda = 0.0;
1508 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1509 /* coord[3] is the bias to add to lambda */
1510 bias = texUnit->LodBias + coord[3];
1511 if (texUnit->_Current)
1512 bias += texUnit->_Current->LodBias;
1513 machine->FetchTexelLod(ctx, coord, lambda + bias,
1514 inst->TexSrcUnit, color);
1515 store_vector4(inst, machine, color);
1516 }
1517 break;
1518 case OPCODE_TXD: /* GL_NV_fragment_program only */
1519 /* Texture lookup w/ partial derivatives for LOD */
1520 {
1521 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1522 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1523 fetch_vector4(ctx, &inst->SrcReg[1], machine, dtdx);
1524 fetch_vector4(ctx, &inst->SrcReg[2], machine, dtdy);
1525 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1526 inst->TexSrcUnit, color);
1527 store_vector4(inst, machine, color);
1528 }
1529 break;
1530 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1531 /* Texture lookup w/ projective divide */
1532 {
1533 GLfloat texcoord[4], color[4], lambda;
1534#if 0
1535 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1536 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1537 lambda = span->array->lambda[inst->TexSrcUnit][column];
1538 else
1539#endif
1540 lambda = 0.0;
1541 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1542 /* Not so sure about this test - if texcoord[3] is
1543 * zero, we'd probably be fine except for an ASSERT in
1544 * IROUND_POS() which gets triggered by the inf values created.
1545 */
1546 if (texcoord[3] != 0.0) {
1547 texcoord[0] /= texcoord[3];
1548 texcoord[1] /= texcoord[3];
1549 texcoord[2] /= texcoord[3];
1550 }
1551 machine->FetchTexelLod(ctx, texcoord, lambda,
1552 inst->TexSrcUnit, color);
1553 store_vector4(inst, machine, color);
1554 }
1555 break;
1556 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1557 /* Texture lookup w/ projective divide */
1558 {
1559 GLfloat texcoord[4], color[4], lambda;
1560#if 0
1561 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1562 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1563 lambda = span->array->lambda[inst->TexSrcUnit][column];
1564 else
1565#endif
1566 lambda = 0.0;
1567 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1568 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1569 texcoord[3] != 0.0) {
1570 texcoord[0] /= texcoord[3];
1571 texcoord[1] /= texcoord[3];
1572 texcoord[2] /= texcoord[3];
1573 }
1574 machine->FetchTexelLod(ctx, texcoord, lambda,
1575 inst->TexSrcUnit, color);
1576 store_vector4(inst, machine, color);
1577 }
1578 break;
1579 case OPCODE_UP2H: /* unpack two 16-bit floats */
1580 {
1581 GLfloat a[4], result[4];
1582 const GLuint *rawBits = (const GLuint *) a;
1583 GLhalfNV hx, hy;
1584 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1585 hx = rawBits[0] & 0xffff;
1586 hy = rawBits[0] >> 16;
1587 result[0] = result[2] = _mesa_half_to_float(hx);
1588 result[1] = result[3] = _mesa_half_to_float(hy);
1589 store_vector4(inst, machine, result);
1590 }
1591 break;
1592 case OPCODE_UP2US: /* unpack two GLushorts */
1593 {
1594 GLfloat a[4], result[4];
1595 const GLuint *rawBits = (const GLuint *) a;
1596 GLushort usx, usy;
1597 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1598 usx = rawBits[0] & 0xffff;
1599 usy = rawBits[0] >> 16;
1600 result[0] = result[2] = usx * (1.0f / 65535.0f);
1601 result[1] = result[3] = usy * (1.0f / 65535.0f);
1602 store_vector4(inst, machine, result);
1603 }
1604 break;
1605 case OPCODE_UP4B: /* unpack four GLbytes */
1606 {
1607 GLfloat a[4], result[4];
1608 const GLuint *rawBits = (const GLuint *) a;
1609 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1610 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1611 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1612 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1613 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1614 store_vector4(inst, machine, result);
1615 }
1616 break;
1617 case OPCODE_UP4UB: /* unpack four GLubytes */
1618 {
1619 GLfloat a[4], result[4];
1620 const GLuint *rawBits = (const GLuint *) a;
1621 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1622 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1623 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1624 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1625 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1626 store_vector4(inst, machine, result);
1627 }
1628 break;
1629 case OPCODE_XPD: /* cross product */
1630 {
1631 GLfloat a[4], b[4], result[4];
1632 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1633 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1634 result[0] = a[1] * b[2] - a[2] * b[1];
1635 result[1] = a[2] * b[0] - a[0] * b[2];
1636 result[2] = a[0] * b[1] - a[1] * b[0];
1637 result[3] = 1.0;
1638 store_vector4(inst, machine, result);
1639 }
1640 break;
1641 case OPCODE_X2D: /* 2-D matrix transform */
1642 {
1643 GLfloat a[4], b[4], c[4], result[4];
1644 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1645 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1646 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
1647 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1648 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1649 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1650 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1651 store_vector4(inst, machine, result);
1652 }
1653 break;
1654 case OPCODE_PRINT:
1655 {
1656 if (inst->SrcReg[0].File != -1) {
1657 GLfloat a[4];
1658 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1659 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1660 a[0], a[1], a[2], a[3]);
1661 }
1662 else {
1663 _mesa_printf("%s\n", (const char *) inst->Data);
1664 }
1665 }
1666 break;
1667 case OPCODE_END:
1668 return GL_TRUE;
1669 default:
1670 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1671 inst->Opcode);
1672 return GL_TRUE; /* return value doesn't matter */
Brian13e3b212007-02-22 16:09:40 -07001673
1674 }
Briane80d9012007-02-23 16:53:24 -07001675
Brian13e3b212007-02-22 16:09:40 -07001676 total++;
1677 if (total > MAX_EXEC) {
1678 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1679 return GL_TRUE;
1680 abort();
1681 }
Briane80d9012007-02-23 16:53:24 -07001682
1683 } /* for pc */
Brian13e3b212007-02-22 16:09:40 -07001684
1685#if FEATURE_MESA_program_debug
1686 CurrentMachine = NULL;
1687#endif
1688
1689 return GL_TRUE;
1690}