blob: 6417a88e900557d6caeb8252efa76aba26aea876 [file] [log] [blame]
Brian13e3b212007-02-22 16:09:40 -07001/*
2 * Mesa 3-D graphics library
3 * Version: 6.5.3
4 *
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31/*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38#include "glheader.h"
39#include "colormac.h"
40#include "context.h"
41#include "program.h"
42#include "prog_execute.h"
43#include "prog_instruction.h"
44#include "prog_parameter.h"
45#include "prog_print.h"
46#include "slang_library_noise.h"
47
48
49/* See comments below for info about this */
50#define LAMBDA_ZERO 1
51
52/* debug predicate */
53#define DEBUG_PROG 0
54
55
Brianf183a2d2007-02-23 17:14:30 -070056/**
57 * Set x to positive or negative infinity.
58 */
59#if defined(USE_IEEE) || defined(_WIN32)
60#define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
61#define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
62#elif defined(VMS)
63#define SET_POS_INFINITY(x) x = __MAXFLOAT
64#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
65#else
66#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
67#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
68#endif
69
70#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
71
72
73static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
74
75
76
Brian13e3b212007-02-22 16:09:40 -070077#if FEATURE_MESA_program_debug
78static struct gl_program_machine *CurrentMachine = NULL;
79
80/**
81 * For GL_MESA_program_debug.
82 * Return current value (4*GLfloat) of a program register.
83 * Called via ctx->Driver.GetFragmentProgramRegister().
84 */
85void
86_mesa_get_program_register(GLcontext *ctx, enum register_file file,
87 GLuint index, GLfloat val[4])
88{
89 if (CurrentMachine) {
90 switch (file) {
91 case PROGRAM_INPUT:
92 if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
93 COPY_4V(val, CurrentMachine->VertAttribs[index]);
94 }
95 else {
96 COPY_4V(val,
Briane80d9012007-02-23 16:53:24 -070097 CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
Brian13e3b212007-02-22 16:09:40 -070098 }
99 break;
100 case PROGRAM_OUTPUT:
101 COPY_4V(val, CurrentMachine->Outputs[index]);
102 break;
103 case PROGRAM_TEMPORARY:
104 COPY_4V(val, CurrentMachine->Temporaries[index]);
105 break;
106 default:
107 _mesa_problem(NULL,
108 "bad register file in _swrast_get_program_register");
109 }
110 }
111}
112#endif /* FEATURE_MESA_program_debug */
113
114
115
116/**
117 * Return a pointer to the 4-element float vector specified by the given
118 * source register.
119 */
120static INLINE const GLfloat *
Briane80d9012007-02-23 16:53:24 -0700121get_register_pointer(GLcontext * ctx,
122 const struct prog_src_register *source,
123 const struct gl_program_machine *machine)
Brian13e3b212007-02-22 16:09:40 -0700124{
Brianf183a2d2007-02-23 17:14:30 -0700125 if (source->RelAddr) {
126 const GLint reg = source->Index + machine->AddressReg[0][0];
127 ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
128 (source->File == PROGRAM_STATE_VAR) );
129 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
130 return ZeroVec;
131 else if (source->File == PROGRAM_ENV_PARAM)
132 return ctx->VertexProgram.Parameters[reg];
133 else {
Brian761728a2007-02-24 11:14:57 -0700134 ASSERT(source->File == PROGRAM_LOCAL_PARAM ||
135 source->File == PROGRAM_STATE_VAR);
Brianf183a2d2007-02-23 17:14:30 -0700136 return machine->CurProgram->Parameters->ParameterValues[reg];
137 }
138 }
139
Brian13e3b212007-02-22 16:09:40 -0700140 switch (source->File) {
141 case PROGRAM_TEMPORARY:
142 ASSERT(source->Index < MAX_PROGRAM_TEMPS);
143 return machine->Temporaries[source->Index];
144
145 case PROGRAM_INPUT:
146 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
147 ASSERT(source->Index < VERT_ATTRIB_MAX);
148 return machine->VertAttribs[source->Index];
149 }
150 else {
151 ASSERT(source->Index < FRAG_ATTRIB_MAX);
152 return machine->Attribs[source->Index][machine->CurElement];
153 }
154
155 case PROGRAM_OUTPUT:
156 /* This is only for PRINT */
157 ASSERT(source->Index < FRAG_RESULT_MAX);
158 return machine->Outputs[source->Index];
159
160 case PROGRAM_LOCAL_PARAM:
161 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
162 return machine->CurProgram->LocalParams[source->Index];
163
164 case PROGRAM_ENV_PARAM:
165 ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
166 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
167 return ctx->VertexProgram.Parameters[source->Index];
168 else
169 return ctx->FragmentProgram.Parameters[source->Index];
170
171 case PROGRAM_STATE_VAR:
172 /* Fallthrough */
173 case PROGRAM_CONSTANT:
174 /* Fallthrough */
175 case PROGRAM_UNIFORM:
176 /* Fallthrough */
177 case PROGRAM_NAMED_PARAM:
178 ASSERT(source->Index <
179 (GLint) machine->CurProgram->Parameters->NumParameters);
180 return machine->CurProgram->Parameters->ParameterValues[source->Index];
181
182 default:
183 _mesa_problem(ctx,
184 "Invalid input register file %d in get_register_pointer()",
185 source->File);
186 return NULL;
187 }
188}
189
190
191/**
192 * Fetch a 4-element float vector from the given source register.
193 * Apply swizzling and negating as needed.
194 */
195static void
Briane80d9012007-02-23 16:53:24 -0700196fetch_vector4(GLcontext * ctx,
197 const struct prog_src_register *source,
198 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700199{
200 const GLfloat *src = get_register_pointer(ctx, source, machine);
201 ASSERT(src);
202
203 if (source->Swizzle == SWIZZLE_NOOP) {
204 /* no swizzling */
205 COPY_4V(result, src);
206 }
207 else {
208 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
209 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
210 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
211 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
212 result[0] = src[GET_SWZ(source->Swizzle, 0)];
213 result[1] = src[GET_SWZ(source->Swizzle, 1)];
214 result[2] = src[GET_SWZ(source->Swizzle, 2)];
215 result[3] = src[GET_SWZ(source->Swizzle, 3)];
216 }
217
218 if (source->NegateBase) {
219 result[0] = -result[0];
220 result[1] = -result[1];
221 result[2] = -result[2];
222 result[3] = -result[3];
223 }
224 if (source->Abs) {
225 result[0] = FABSF(result[0]);
226 result[1] = FABSF(result[1]);
227 result[2] = FABSF(result[2]);
228 result[3] = FABSF(result[3]);
229 }
230 if (source->NegateAbs) {
231 result[0] = -result[0];
232 result[1] = -result[1];
233 result[2] = -result[2];
234 result[3] = -result[3];
235 }
236}
237
238#if 0
239/**
240 * Fetch the derivative with respect to X for the given register.
241 * \return GL_TRUE if it was easily computed or GL_FALSE if we
242 * need to execute another instance of the program (ugh)!
243 */
244static GLboolean
Briane80d9012007-02-23 16:53:24 -0700245fetch_vector4_deriv(GLcontext * ctx,
246 const struct prog_src_register *source,
247 const SWspan * span,
248 char xOrY, GLint column, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700249{
250 GLfloat src[4];
251
252 ASSERT(xOrY == 'X' || xOrY == 'Y');
253
254 switch (source->Index) {
255 case FRAG_ATTRIB_WPOS:
256 if (xOrY == 'X') {
257 src[0] = 1.0;
258 src[1] = 0.0;
259 src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700260 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700261 src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
262 }
263 else {
264 src[0] = 0.0;
265 src[1] = 1.0;
266 src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700267 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700268 src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
269 }
270 break;
271 case FRAG_ATTRIB_COL0:
272 case FRAG_ATTRIB_COL1:
273 if (xOrY == 'X') {
274 src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
275 src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
276 src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
277 src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
278 }
279 else {
280 src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
281 src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
282 src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
283 src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
284 }
285 break;
286 case FRAG_ATTRIB_FOGC:
287 if (xOrY == 'X') {
288 src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
289 src[1] = 0.0;
290 src[2] = 0.0;
291 src[3] = 0.0;
292 }
293 else {
294 src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
295 src[1] = 0.0;
296 src[2] = 0.0;
297 src[3] = 0.0;
298 }
299 break;
300 default:
301 assert(source->Index < FRAG_ATTRIB_MAX);
302 /* texcoord or varying */
303 if (xOrY == 'X') {
304 /* this is a little tricky - I think I've got it right */
305 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
Briane80d9012007-02-23 16:53:24 -0700306 +
307 span->attrStepX[source->Index][3] *
308 column);
Brian13e3b212007-02-22 16:09:40 -0700309 src[0] = span->attrStepX[source->Index][0] * invQ;
310 src[1] = span->attrStepX[source->Index][1] * invQ;
311 src[2] = span->attrStepX[source->Index][2] * invQ;
312 src[3] = span->attrStepX[source->Index][3] * invQ;
313 }
314 else {
315 /* Tricky, as above, but in Y direction */
316 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
317 + span->attrStepY[source->Index][3]);
318 src[0] = span->attrStepY[source->Index][0] * invQ;
319 src[1] = span->attrStepY[source->Index][1] * invQ;
320 src[2] = span->attrStepY[source->Index][2] * invQ;
321 src[3] = span->attrStepY[source->Index][3] * invQ;
322 }
323 break;
324 }
325
326 result[0] = src[GET_SWZ(source->Swizzle, 0)];
327 result[1] = src[GET_SWZ(source->Swizzle, 1)];
328 result[2] = src[GET_SWZ(source->Swizzle, 2)];
329 result[3] = src[GET_SWZ(source->Swizzle, 3)];
330
331 if (source->NegateBase) {
332 result[0] = -result[0];
333 result[1] = -result[1];
334 result[2] = -result[2];
335 result[3] = -result[3];
336 }
337 if (source->Abs) {
338 result[0] = FABSF(result[0]);
339 result[1] = FABSF(result[1]);
340 result[2] = FABSF(result[2]);
341 result[3] = FABSF(result[3]);
342 }
343 if (source->NegateAbs) {
344 result[0] = -result[0];
345 result[1] = -result[1];
346 result[2] = -result[2];
347 result[3] = -result[3];
348 }
349 return GL_TRUE;
350}
351#endif
352
353
354/**
355 * As above, but only return result[0] element.
356 */
357static void
Briane80d9012007-02-23 16:53:24 -0700358fetch_vector1(GLcontext * ctx,
359 const struct prog_src_register *source,
360 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700361{
362 const GLfloat *src = get_register_pointer(ctx, source, machine);
363 ASSERT(src);
364
365 result[0] = src[GET_SWZ(source->Swizzle, 0)];
366
367 if (source->NegateBase) {
368 result[0] = -result[0];
369 }
370 if (source->Abs) {
371 result[0] = FABSF(result[0]);
372 }
373 if (source->NegateAbs) {
374 result[0] = -result[0];
375 }
376}
377
378
379/**
380 * Test value against zero and return GT, LT, EQ or UN if NaN.
381 */
382static INLINE GLuint
Briane80d9012007-02-23 16:53:24 -0700383generate_cc(float value)
Brian13e3b212007-02-22 16:09:40 -0700384{
385 if (value != value)
Briane80d9012007-02-23 16:53:24 -0700386 return COND_UN; /* NaN */
Brian13e3b212007-02-22 16:09:40 -0700387 if (value > 0.0F)
388 return COND_GT;
389 if (value < 0.0F)
390 return COND_LT;
391 return COND_EQ;
392}
393
394
395/**
396 * Test if the ccMaskRule is satisfied by the given condition code.
397 * Used to mask destination writes according to the current condition code.
398 */
399static INLINE GLboolean
400test_cc(GLuint condCode, GLuint ccMaskRule)
401{
402 switch (ccMaskRule) {
403 case COND_EQ: return (condCode == COND_EQ);
404 case COND_NE: return (condCode != COND_EQ);
405 case COND_LT: return (condCode == COND_LT);
406 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
407 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
408 case COND_GT: return (condCode == COND_GT);
409 case COND_TR: return GL_TRUE;
410 case COND_FL: return GL_FALSE;
411 default: return GL_TRUE;
412 }
413}
414
415
416/**
417 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
418 * or GL_FALSE to indicate result.
419 */
420static INLINE GLboolean
421eval_condition(const struct gl_program_machine *machine,
422 const struct prog_instruction *inst)
423{
424 const GLuint swizzle = inst->DstReg.CondSwizzle;
425 const GLuint condMask = inst->DstReg.CondMask;
426 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
427 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
428 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
429 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
430 return GL_TRUE;
431 }
432 else {
433 return GL_FALSE;
434 }
435}
436
437
438
439/**
440 * Store 4 floats into a register. Observe the instructions saturate and
441 * set-condition-code flags.
442 */
443static void
Briane80d9012007-02-23 16:53:24 -0700444store_vector4(const struct prog_instruction *inst,
445 struct gl_program_machine *machine, const GLfloat value[4])
Brian13e3b212007-02-22 16:09:40 -0700446{
447 const struct prog_dst_register *dest = &(inst->DstReg);
448 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
449 GLfloat *dstReg;
450 GLfloat dummyReg[4];
451 GLfloat clampedValue[4];
452 GLuint writeMask = dest->WriteMask;
453
454 switch (dest->File) {
Briane80d9012007-02-23 16:53:24 -0700455 case PROGRAM_OUTPUT:
456 dstReg = machine->Outputs[dest->Index];
457 break;
458 case PROGRAM_TEMPORARY:
459 dstReg = machine->Temporaries[dest->Index];
460 break;
461 case PROGRAM_WRITE_ONLY:
462 dstReg = dummyReg;
463 return;
464 default:
465 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
466 return;
Brian13e3b212007-02-22 16:09:40 -0700467 }
468
469#if 0
470 if (value[0] > 1.0e10 ||
471 IS_INF_OR_NAN(value[0]) ||
472 IS_INF_OR_NAN(value[1]) ||
Briane80d9012007-02-23 16:53:24 -0700473 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
Brian13e3b212007-02-22 16:09:40 -0700474 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
475#endif
476
477 if (clamp) {
478 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
479 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
480 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
481 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
482 value = clampedValue;
483 }
484
485 if (dest->CondMask != COND_TR) {
486 /* condition codes may turn off some writes */
487 if (writeMask & WRITEMASK_X) {
488 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
489 dest->CondMask))
490 writeMask &= ~WRITEMASK_X;
491 }
492 if (writeMask & WRITEMASK_Y) {
493 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
494 dest->CondMask))
495 writeMask &= ~WRITEMASK_Y;
496 }
497 if (writeMask & WRITEMASK_Z) {
498 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
499 dest->CondMask))
500 writeMask &= ~WRITEMASK_Z;
501 }
502 if (writeMask & WRITEMASK_W) {
503 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
504 dest->CondMask))
505 writeMask &= ~WRITEMASK_W;
506 }
507 }
508
509 if (writeMask & WRITEMASK_X)
510 dstReg[0] = value[0];
511 if (writeMask & WRITEMASK_Y)
512 dstReg[1] = value[1];
513 if (writeMask & WRITEMASK_Z)
514 dstReg[2] = value[2];
515 if (writeMask & WRITEMASK_W)
516 dstReg[3] = value[3];
517
518 if (inst->CondUpdate) {
519 if (writeMask & WRITEMASK_X)
520 machine->CondCodes[0] = generate_cc(value[0]);
521 if (writeMask & WRITEMASK_Y)
522 machine->CondCodes[1] = generate_cc(value[1]);
523 if (writeMask & WRITEMASK_Z)
524 machine->CondCodes[2] = generate_cc(value[2]);
525 if (writeMask & WRITEMASK_W)
526 machine->CondCodes[3] = generate_cc(value[3]);
527 }
528}
529
530
531#if 0
532/**
533 * Initialize a new machine state instance from an existing one, adding
534 * the partial derivatives onto the input registers.
535 * Used to implement DDX and DDY instructions in non-trivial cases.
536 */
537static void
Briane80d9012007-02-23 16:53:24 -0700538init_machine_deriv(GLcontext * ctx,
539 const struct gl_program_machine *machine,
540 const struct gl_fragment_program *program,
541 const SWspan * span, char xOrY,
542 struct gl_program_machine *dMachine)
Brian13e3b212007-02-22 16:09:40 -0700543{
544 GLuint attr;
545
546 ASSERT(xOrY == 'X' || xOrY == 'Y');
547
548 /* copy existing machine */
549 _mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
550
551 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
552 /* XXX also need to do this when using valgrind */
553 /* Clear temporary registers (undefined for ARB_f_p) */
Briane80d9012007-02-23 16:53:24 -0700554 _mesa_bzero((void *) machine->Temporaries,
555 MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
Brian13e3b212007-02-22 16:09:40 -0700556 }
557
558 /* Add derivatives */
559 if (program->Base.InputsRead & FRAG_BIT_WPOS) {
560 GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
561 if (xOrY == 'X') {
562 wpos[0] += 1.0F;
563 wpos[1] += 0.0F;
564 wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
565 wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
566 }
567 else {
568 wpos[0] += 0.0F;
569 wpos[1] += 1.0F;
570 wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
571 wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
572 }
573 }
574
575 /* primary, secondary colors */
576 for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
577 if (program->Base.InputsRead & (1 << attr)) {
578 GLfloat *col = machine->Attribs[attr][machine->CurElement];
579 if (xOrY == 'X') {
580 col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
581 col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
582 col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
583 col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
584 }
585 else {
586 col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
587 col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
588 col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
589 col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
590 }
591 }
592 }
593 if (program->Base.InputsRead & FRAG_BIT_FOGC) {
594 GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
595 if (xOrY == 'X') {
596 fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
597 }
598 else {
599 fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
600 }
601 }
602 /* texcoord and varying vars */
603 for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
604 if (program->Base.InputsRead & (1 << attr)) {
605 GLfloat *val = machine->Attribs[attr][machine->CurElement];
606 /* XXX perspective-correct interpolation */
607 if (xOrY == 'X') {
608 val[0] += span->attrStepX[attr][0];
609 val[1] += span->attrStepX[attr][1];
610 val[2] += span->attrStepX[attr][2];
611 val[3] += span->attrStepX[attr][3];
612 }
613 else {
614 val[0] += span->attrStepY[attr][0];
615 val[1] += span->attrStepY[attr][1];
616 val[2] += span->attrStepY[attr][2];
617 val[3] += span->attrStepY[attr][3];
618 }
619 }
620 }
621
622 /* init condition codes */
623 dMachine->CondCodes[0] = COND_EQ;
624 dMachine->CondCodes[1] = COND_EQ;
625 dMachine->CondCodes[2] = COND_EQ;
626 dMachine->CondCodes[3] = COND_EQ;
627}
628#endif
629
630
631/**
632 * Execute the given vertex/fragment program.
633 *
634 * \param ctx - rendering context
635 * \param program - the fragment program to execute
636 * \param machine - machine state (register file)
637 * \param maxInst - max number of instructions to execute
638 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
639 */
640GLboolean
Briane80d9012007-02-23 16:53:24 -0700641_mesa_execute_program(GLcontext * ctx,
Brian13e3b212007-02-22 16:09:40 -0700642 const struct gl_program *program, GLuint maxInst,
643 struct gl_program_machine *machine, GLuint element)
644{
645 const GLuint MAX_EXEC = 10000;
646 GLint pc, total = 0;
647
648 machine->CurProgram = program;
649
650 if (DEBUG_PROG) {
651 printf("execute program %u --------------------\n", program->Id);
652 }
653
654#if FEATURE_MESA_program_debug
655 CurrentMachine = machine;
656#endif
657
658 for (pc = 0; pc < maxInst; pc++) {
659 const struct prog_instruction *inst = program->Instructions + pc;
660
661#if FEATURE_MESA_program_debug
662 if (ctx->FragmentProgram.CallbackEnabled &&
663 ctx->FragmentProgram.Callback) {
664 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
665 ctx->FragmentProgram.Callback(program->Target,
666 ctx->FragmentProgram.CallbackData);
667 }
668#endif
669
670 if (DEBUG_PROG) {
671 _mesa_print_instruction(inst);
672 }
673
674 switch (inst->Opcode) {
Briane80d9012007-02-23 16:53:24 -0700675 case OPCODE_ABS:
676 {
677 GLfloat a[4], result[4];
678 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
679 result[0] = FABSF(a[0]);
680 result[1] = FABSF(a[1]);
681 result[2] = FABSF(a[2]);
682 result[3] = FABSF(a[3]);
683 store_vector4(inst, machine, result);
684 }
685 break;
686 case OPCODE_ADD:
687 {
688 GLfloat a[4], b[4], result[4];
689 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
690 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
691 result[0] = a[0] + b[0];
692 result[1] = a[1] + b[1];
693 result[2] = a[2] + b[2];
694 result[3] = a[3] + b[3];
695 store_vector4(inst, machine, result);
696 if (DEBUG_PROG) {
697 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
698 result[0], result[1], result[2], result[3],
699 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700700 }
Briane80d9012007-02-23 16:53:24 -0700701 }
702 break;
Brianf183a2d2007-02-23 17:14:30 -0700703 case OPCODE_ARL:
704 {
705 GLfloat t[4];
706 fetch_vector4(ctx, &inst->SrcReg[0], machine, t);
707 machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
708 }
709 break;
Briane80d9012007-02-23 16:53:24 -0700710 case OPCODE_BGNLOOP:
711 /* no-op */
712 break;
713 case OPCODE_ENDLOOP:
714 /* subtract 1 here since pc is incremented by for(pc) loop */
715 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
716 break;
717 case OPCODE_BGNSUB: /* begin subroutine */
718 break;
719 case OPCODE_ENDSUB: /* end subroutine */
720 break;
721 case OPCODE_BRA: /* branch (conditional) */
722 /* fall-through */
723 case OPCODE_BRK: /* break out of loop (conditional) */
724 /* fall-through */
725 case OPCODE_CONT: /* continue loop (conditional) */
726 if (eval_condition(machine, inst)) {
727 /* take branch */
728 /* Subtract 1 here since we'll do pc++ at end of for-loop */
729 pc = inst->BranchTarget - 1;
730 }
731 break;
732 case OPCODE_CAL: /* Call subroutine (conditional) */
733 if (eval_condition(machine, inst)) {
734 /* call the subroutine */
735 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
736 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -0700737 }
Briane80d9012007-02-23 16:53:24 -0700738 machine->CallStack[machine->StackDepth++] = pc + 1;
739 pc = inst->BranchTarget; /* XXX - 1 ??? */
740 }
741 break;
742 case OPCODE_CMP:
743 {
744 GLfloat a[4], b[4], c[4], result[4];
745 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
746 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
747 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
748 result[0] = a[0] < 0.0F ? b[0] : c[0];
749 result[1] = a[1] < 0.0F ? b[1] : c[1];
750 result[2] = a[2] < 0.0F ? b[2] : c[2];
751 result[3] = a[3] < 0.0F ? b[3] : c[3];
752 store_vector4(inst, machine, result);
753 }
754 break;
755 case OPCODE_COS:
756 {
757 GLfloat a[4], result[4];
758 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
759 result[0] = result[1] = result[2] = result[3]
760 = (GLfloat) _mesa_cos(a[0]);
761 store_vector4(inst, machine, result);
762 }
763 break;
764 case OPCODE_DDX: /* Partial derivative with respect to X */
765 {
Brian13e3b212007-02-22 16:09:40 -0700766#if 0
Briane80d9012007-02-23 16:53:24 -0700767 GLfloat a[4], aNext[4], result[4];
768 struct gl_program_machine dMachine;
769 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
770 column, result)) {
771 /* This is tricky. Make a copy of the current machine state,
772 * increment the input registers by the dx or dy partial
773 * derivatives, then re-execute the program up to the
774 * preceeding instruction, then fetch the source register.
775 * Finally, find the difference in the register values for
776 * the original and derivative runs.
777 */
778 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
779 init_machine_deriv(ctx, machine, program, span,
780 'X', &dMachine);
781 execute_program(ctx, program, pc, &dMachine, span, column);
782 fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
783 aNext);
784 result[0] = aNext[0] - a[0];
785 result[1] = aNext[1] - a[1];
786 result[2] = aNext[2] - a[2];
787 result[3] = aNext[3] - a[3];
Brian13e3b212007-02-22 16:09:40 -0700788 }
Briane80d9012007-02-23 16:53:24 -0700789 store_vector4(inst, machine, result);
790#else
Brianf183a2d2007-02-23 17:14:30 -0700791 store_vector4(inst, machine, ZeroVec);
Briane80d9012007-02-23 16:53:24 -0700792#endif
793 }
794 break;
795 case OPCODE_DDY: /* Partial derivative with respect to Y */
796 {
Brian13e3b212007-02-22 16:09:40 -0700797#if 0
Briane80d9012007-02-23 16:53:24 -0700798 GLfloat a[4], aNext[4], result[4];
799 struct gl_program_machine dMachine;
800 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
801 column, result)) {
802 init_machine_deriv(ctx, machine, program, span,
803 'Y', &dMachine);
804 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
805 execute_program(ctx, program, pc, &dMachine, span, column);
806 fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
807 aNext);
808 result[0] = aNext[0] - a[0];
809 result[1] = aNext[1] - a[1];
810 result[2] = aNext[2] - a[2];
811 result[3] = aNext[3] - a[3];
812 }
813 store_vector4(inst, machine, result);
Brian13e3b212007-02-22 16:09:40 -0700814#else
Brianf183a2d2007-02-23 17:14:30 -0700815 store_vector4(inst, machine, ZeroVec);
Brian13e3b212007-02-22 16:09:40 -0700816#endif
Briane80d9012007-02-23 16:53:24 -0700817 }
818 break;
819 case OPCODE_DP3:
820 {
821 GLfloat a[4], b[4], result[4];
822 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
823 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
824 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
825 store_vector4(inst, machine, result);
826 if (DEBUG_PROG) {
827 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
828 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
Brian13e3b212007-02-22 16:09:40 -0700829 }
Briane80d9012007-02-23 16:53:24 -0700830 }
831 break;
832 case OPCODE_DP4:
833 {
834 GLfloat a[4], b[4], result[4];
835 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
836 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
837 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
838 store_vector4(inst, machine, result);
839 if (DEBUG_PROG) {
840 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
841 result[0], a[0], a[1], a[2], a[3],
842 b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700843 }
Briane80d9012007-02-23 16:53:24 -0700844 }
845 break;
846 case OPCODE_DPH:
847 {
848 GLfloat a[4], b[4], result[4];
849 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
850 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
851 result[0] = result[1] = result[2] = result[3] =
852 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
853 store_vector4(inst, machine, result);
854 }
855 break;
856 case OPCODE_DST: /* Distance vector */
857 {
858 GLfloat a[4], b[4], result[4];
859 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
860 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
861 result[0] = 1.0F;
862 result[1] = a[1] * b[1];
863 result[2] = a[2];
864 result[3] = b[3];
865 store_vector4(inst, machine, result);
866 }
867 break;
Brianf183a2d2007-02-23 17:14:30 -0700868 case OPCODE_EXP:
Brianf183a2d2007-02-23 17:14:30 -0700869 {
870 GLfloat t[4], q[4], floor_t0;
871 fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
872 floor_t0 = FLOORF(t[0]);
873 if (floor_t0 > FLT_MAX_EXP) {
874 SET_POS_INFINITY(q[0]);
875 SET_POS_INFINITY(q[2]);
876 }
877 else if (floor_t0 < FLT_MIN_EXP) {
878 q[0] = 0.0F;
879 q[2] = 0.0F;
880 }
881 else {
Brian761728a2007-02-24 11:14:57 -0700882 q[0] = LDEXPF(1.0, (int) floor_t0);
883 /* Note: GL_NV_vertex_program expects
884 * result.z = result.x * APPX(result.y)
885 * We do what the ARB extension says.
886 */
887 q[2] = pow(2.0, t[0]);
Brianf183a2d2007-02-23 17:14:30 -0700888 }
889 q[1] = t[0] - floor_t0;
890 q[3] = 1.0F;
891 store_vector4( inst, machine, q );
892 }
893 break;
Briane80d9012007-02-23 16:53:24 -0700894 case OPCODE_EX2: /* Exponential base 2 */
895 {
896 GLfloat a[4], result[4];
897 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
898 result[0] = result[1] = result[2] = result[3] =
899 (GLfloat) _mesa_pow(2.0, a[0]);
900 store_vector4(inst, machine, result);
901 }
902 break;
903 case OPCODE_FLR:
904 {
905 GLfloat a[4], result[4];
906 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
907 result[0] = FLOORF(a[0]);
908 result[1] = FLOORF(a[1]);
909 result[2] = FLOORF(a[2]);
910 result[3] = FLOORF(a[3]);
911 store_vector4(inst, machine, result);
912 }
913 break;
914 case OPCODE_FRC:
915 {
916 GLfloat a[4], result[4];
917 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
918 result[0] = a[0] - FLOORF(a[0]);
919 result[1] = a[1] - FLOORF(a[1]);
920 result[2] = a[2] - FLOORF(a[2]);
921 result[3] = a[3] - FLOORF(a[3]);
922 store_vector4(inst, machine, result);
923 }
924 break;
925 case OPCODE_IF:
926 if (eval_condition(machine, inst)) {
927 /* do if-clause (just continue execution) */
928 }
929 else {
930 /* go to the instruction after ELSE or ENDIF */
Brian13e3b212007-02-22 16:09:40 -0700931 assert(inst->BranchTarget >= 0);
932 pc = inst->BranchTarget - 1;
Briane80d9012007-02-23 16:53:24 -0700933 }
934 break;
935 case OPCODE_ELSE:
936 /* goto ENDIF */
937 assert(inst->BranchTarget >= 0);
938 pc = inst->BranchTarget - 1;
939 break;
940 case OPCODE_ENDIF:
941 /* nothing */
942 break;
943 case OPCODE_INT: /* float to int */
944 {
945 GLfloat a[4], result[4];
946 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
947 result[0] = (GLfloat) (GLint) a[0];
948 result[1] = (GLfloat) (GLint) a[1];
949 result[2] = (GLfloat) (GLint) a[2];
950 result[3] = (GLfloat) (GLint) a[3];
951 store_vector4(inst, machine, result);
952 }
953 break;
954 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
955 if (eval_condition(machine, inst)) {
956 return GL_FALSE;
957 }
958 break;
959 case OPCODE_KIL: /* ARB_f_p only */
960 {
961 GLfloat a[4];
962 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
963 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
Brian13e3b212007-02-22 16:09:40 -0700964 return GL_FALSE;
965 }
Briane80d9012007-02-23 16:53:24 -0700966 }
967 break;
968 case OPCODE_LG2: /* log base 2 */
969 {
970 GLfloat a[4], result[4];
971 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
972 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
973 store_vector4(inst, machine, result);
974 }
975 break;
976 case OPCODE_LIT:
977 {
978 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
979 GLfloat a[4], result[4];
980 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
981 a[0] = MAX2(a[0], 0.0F);
982 a[1] = MAX2(a[1], 0.0F);
983 /* XXX ARB version clamps a[3], NV version doesn't */
984 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
985 result[0] = 1.0F;
986 result[1] = a[0];
987 /* XXX we could probably just use pow() here */
988 if (a[0] > 0.0F) {
989 if (a[1] == 0.0 && a[3] == 0.0)
990 result[2] = 1.0;
991 else
992 result[2] = EXPF(a[3] * LOGF(a[1]));
Brian13e3b212007-02-22 16:09:40 -0700993 }
Briane80d9012007-02-23 16:53:24 -0700994 else {
995 result[2] = 0.0;
Brian13e3b212007-02-22 16:09:40 -0700996 }
Briane80d9012007-02-23 16:53:24 -0700997 result[3] = 1.0F;
998 store_vector4(inst, machine, result);
999 if (DEBUG_PROG) {
1000 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1001 result[0], result[1], result[2], result[3],
1002 a[0], a[1], a[2], a[3]);
Brian13e3b212007-02-22 16:09:40 -07001003 }
Briane80d9012007-02-23 16:53:24 -07001004 }
1005 break;
Brianf183a2d2007-02-23 17:14:30 -07001006 case OPCODE_LOG:
1007 {
1008 GLfloat t[4], q[4], abs_t0;
1009 fetch_vector1(ctx, &inst->SrcReg[0], machine, t);
1010 abs_t0 = FABSF(t[0]);
1011 if (abs_t0 != 0.0F) {
1012 /* Since we really can't handle infinite values on VMS
1013 * like other OSes we'll use __MAXFLOAT to represent
1014 * infinity. This may need some tweaking.
1015 */
1016#ifdef VMS
1017 if (abs_t0 == __MAXFLOAT)
1018#else
1019 if (IS_INF_OR_NAN(abs_t0))
1020#endif
1021 {
1022 SET_POS_INFINITY(q[0]);
1023 q[1] = 1.0F;
1024 SET_POS_INFINITY(q[2]);
1025 }
1026 else {
1027 int exponent;
1028 GLfloat mantissa = FREXPF(t[0], &exponent);
1029 q[0] = (GLfloat) (exponent - 1);
1030 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1031 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
1032 }
1033 }
1034 else {
1035 SET_NEG_INFINITY(q[0]);
1036 q[1] = 1.0F;
1037 SET_NEG_INFINITY(q[2]);
1038 }
1039 q[3] = 1.0;
1040 store_vector4(inst, machine, q);
1041 }
1042 break;
Briane80d9012007-02-23 16:53:24 -07001043 case OPCODE_LRP:
1044 {
1045 GLfloat a[4], b[4], c[4], result[4];
1046 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1047 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1048 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
1049 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1050 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1051 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1052 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1053 store_vector4(inst, machine, result);
1054 if (DEBUG_PROG) {
1055 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1056 "(%g %g %g %g), (%g %g %g %g)\n",
1057 result[0], result[1], result[2], result[3],
1058 a[0], a[1], a[2], a[3],
1059 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001060 }
Briane80d9012007-02-23 16:53:24 -07001061 }
1062 break;
1063 case OPCODE_MAD:
1064 {
1065 GLfloat a[4], b[4], c[4], result[4];
1066 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1067 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1068 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
1069 result[0] = a[0] * b[0] + c[0];
1070 result[1] = a[1] * b[1] + c[1];
1071 result[2] = a[2] * b[2] + c[2];
1072 result[3] = a[3] * b[3] + c[3];
1073 store_vector4(inst, machine, result);
1074 if (DEBUG_PROG) {
1075 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1076 "(%g %g %g %g) + (%g %g %g %g)\n",
1077 result[0], result[1], result[2], result[3],
1078 a[0], a[1], a[2], a[3],
1079 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001080 }
Briane80d9012007-02-23 16:53:24 -07001081 }
1082 break;
1083 case OPCODE_MAX:
1084 {
1085 GLfloat a[4], b[4], result[4];
1086 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1087 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1088 result[0] = MAX2(a[0], b[0]);
1089 result[1] = MAX2(a[1], b[1]);
1090 result[2] = MAX2(a[2], b[2]);
1091 result[3] = MAX2(a[3], b[3]);
1092 store_vector4(inst, machine, result);
1093 if (DEBUG_PROG) {
1094 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1095 result[0], result[1], result[2], result[3],
1096 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001097 }
Briane80d9012007-02-23 16:53:24 -07001098 }
1099 break;
1100 case OPCODE_MIN:
1101 {
1102 GLfloat a[4], b[4], result[4];
1103 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1104 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1105 result[0] = MIN2(a[0], b[0]);
1106 result[1] = MIN2(a[1], b[1]);
1107 result[2] = MIN2(a[2], b[2]);
1108 result[3] = MIN2(a[3], b[3]);
1109 store_vector4(inst, machine, result);
1110 }
1111 break;
1112 case OPCODE_MOV:
1113 {
1114 GLfloat result[4];
1115 fetch_vector4(ctx, &inst->SrcReg[0], machine, result);
1116 store_vector4(inst, machine, result);
1117 if (DEBUG_PROG) {
1118 printf("MOV (%g %g %g %g)\n",
1119 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001120 }
Briane80d9012007-02-23 16:53:24 -07001121 }
1122 break;
1123 case OPCODE_MUL:
1124 {
1125 GLfloat a[4], b[4], result[4];
1126 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1127 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1128 result[0] = a[0] * b[0];
1129 result[1] = a[1] * b[1];
1130 result[2] = a[2] * b[2];
1131 result[3] = a[3] * b[3];
1132 store_vector4(inst, machine, result);
1133 if (DEBUG_PROG) {
1134 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1135 result[0], result[1], result[2], result[3],
1136 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001137 }
Briane80d9012007-02-23 16:53:24 -07001138 }
1139 break;
1140 case OPCODE_NOISE1:
1141 {
1142 GLfloat a[4], result[4];
1143 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1144 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001145 result[1] =
Briane80d9012007-02-23 16:53:24 -07001146 result[2] = result[3] = _slang_library_noise1(a[0]);
1147 store_vector4(inst, machine, result);
1148 }
1149 break;
1150 case OPCODE_NOISE2:
1151 {
1152 GLfloat a[4], result[4];
1153 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1154 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001155 result[1] =
Briane80d9012007-02-23 16:53:24 -07001156 result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1157 store_vector4(inst, machine, result);
1158 }
1159 break;
1160 case OPCODE_NOISE3:
1161 {
1162 GLfloat a[4], result[4];
1163 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1164 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001165 result[1] =
1166 result[2] =
1167 result[3] = _slang_library_noise3(a[0], a[1], a[2]);
Briane80d9012007-02-23 16:53:24 -07001168 store_vector4(inst, machine, result);
1169 }
1170 break;
1171 case OPCODE_NOISE4:
1172 {
1173 GLfloat a[4], result[4];
1174 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1175 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001176 result[1] =
1177 result[2] =
1178 result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
Briane80d9012007-02-23 16:53:24 -07001179 store_vector4(inst, machine, result);
1180 }
1181 break;
1182 case OPCODE_NOP:
1183 break;
1184 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1185 {
1186 GLfloat a[4], result[4];
1187 GLhalfNV hx, hy;
1188 GLuint *rawResult = (GLuint *) result;
1189 GLuint twoHalves;
1190 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1191 hx = _mesa_float_to_half(a[0]);
1192 hy = _mesa_float_to_half(a[1]);
1193 twoHalves = hx | (hy << 16);
1194 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1195 = twoHalves;
1196 store_vector4(inst, machine, result);
1197 }
1198 break;
1199 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1200 {
1201 GLfloat a[4], result[4];
1202 GLuint usx, usy, *rawResult = (GLuint *) result;
1203 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1204 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1205 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1206 usx = IROUND(a[0] * 65535.0F);
1207 usy = IROUND(a[1] * 65535.0F);
1208 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1209 = usx | (usy << 16);
1210 store_vector4(inst, machine, result);
1211 }
1212 break;
1213 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1214 {
1215 GLfloat a[4], result[4];
1216 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1217 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1218 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1219 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1220 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1221 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1222 ubx = IROUND(127.0F * a[0] + 128.0F);
1223 uby = IROUND(127.0F * a[1] + 128.0F);
1224 ubz = IROUND(127.0F * a[2] + 128.0F);
1225 ubw = IROUND(127.0F * a[3] + 128.0F);
1226 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1227 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1228 store_vector4(inst, machine, result);
1229 }
1230 break;
1231 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1232 {
1233 GLfloat a[4], result[4];
1234 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1235 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1236 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1237 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1238 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1239 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1240 ubx = IROUND(255.0F * a[0]);
1241 uby = IROUND(255.0F * a[1]);
1242 ubz = IROUND(255.0F * a[2]);
1243 ubw = IROUND(255.0F * a[3]);
1244 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1245 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1246 store_vector4(inst, machine, result);
1247 }
1248 break;
1249 case OPCODE_POW:
1250 {
1251 GLfloat a[4], b[4], result[4];
1252 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1253 fetch_vector1(ctx, &inst->SrcReg[1], machine, b);
1254 result[0] = result[1] = result[2] = result[3]
1255 = (GLfloat) _mesa_pow(a[0], b[0]);
1256 store_vector4(inst, machine, result);
1257 }
1258 break;
1259 case OPCODE_RCP:
1260 {
1261 GLfloat a[4], result[4];
1262 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1263 if (DEBUG_PROG) {
1264 if (a[0] == 0)
1265 printf("RCP(0)\n");
1266 else if (IS_INF_OR_NAN(a[0]))
1267 printf("RCP(inf)\n");
Brian13e3b212007-02-22 16:09:40 -07001268 }
Briane80d9012007-02-23 16:53:24 -07001269 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1270 store_vector4(inst, machine, result);
1271 }
1272 break;
1273 case OPCODE_RET: /* return from subroutine (conditional) */
1274 if (eval_condition(machine, inst)) {
1275 if (machine->StackDepth == 0) {
1276 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -07001277 }
Briane80d9012007-02-23 16:53:24 -07001278 pc = machine->CallStack[--machine->StackDepth];
1279 }
1280 break;
1281 case OPCODE_RFL: /* reflection vector */
1282 {
1283 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1284 fetch_vector4(ctx, &inst->SrcReg[0], machine, axis);
1285 fetch_vector4(ctx, &inst->SrcReg[1], machine, dir);
1286 tmpW = DOT3(axis, axis);
1287 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1288 result[0] = tmpX * axis[0] - dir[0];
1289 result[1] = tmpX * axis[1] - dir[1];
1290 result[2] = tmpX * axis[2] - dir[2];
1291 /* result[3] is never written! XXX enforce in parser! */
1292 store_vector4(inst, machine, result);
1293 }
1294 break;
1295 case OPCODE_RSQ: /* 1 / sqrt() */
1296 {
1297 GLfloat a[4], result[4];
1298 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1299 a[0] = FABSF(a[0]);
1300 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1301 store_vector4(inst, machine, result);
1302 if (DEBUG_PROG) {
1303 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
Brian13e3b212007-02-22 16:09:40 -07001304 }
Briane80d9012007-02-23 16:53:24 -07001305 }
1306 break;
1307 case OPCODE_SCS: /* sine and cos */
1308 {
1309 GLfloat a[4], result[4];
1310 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1311 result[0] = (GLfloat) _mesa_cos(a[0]);
1312 result[1] = (GLfloat) _mesa_sin(a[0]);
1313 result[2] = 0.0; /* undefined! */
1314 result[3] = 0.0; /* undefined! */
1315 store_vector4(inst, machine, result);
1316 }
1317 break;
1318 case OPCODE_SEQ: /* set on equal */
1319 {
1320 GLfloat a[4], b[4], result[4];
1321 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1322 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1323 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1324 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1325 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1326 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1327 store_vector4(inst, machine, result);
1328 }
1329 break;
1330 case OPCODE_SFL: /* set false, operands ignored */
1331 {
1332 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1333 store_vector4(inst, machine, result);
1334 }
1335 break;
1336 case OPCODE_SGE: /* set on greater or equal */
1337 {
1338 GLfloat a[4], b[4], result[4];
1339 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1340 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1341 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1342 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1343 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1344 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1345 store_vector4(inst, machine, result);
1346 }
1347 break;
1348 case OPCODE_SGT: /* set on greater */
1349 {
1350 GLfloat a[4], b[4], result[4];
1351 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1352 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1353 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1354 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1355 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1356 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1357 store_vector4(inst, machine, result);
1358 if (DEBUG_PROG) {
1359 printf("SGT %g %g %g %g\n",
1360 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001361 }
Briane80d9012007-02-23 16:53:24 -07001362 }
1363 break;
1364 case OPCODE_SIN:
1365 {
1366 GLfloat a[4], result[4];
1367 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1368 result[0] = result[1] = result[2] = result[3]
1369 = (GLfloat) _mesa_sin(a[0]);
1370 store_vector4(inst, machine, result);
1371 }
1372 break;
1373 case OPCODE_SLE: /* set on less or equal */
1374 {
1375 GLfloat a[4], b[4], result[4];
1376 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1377 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1378 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1379 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1380 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1381 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1382 store_vector4(inst, machine, result);
1383 }
1384 break;
1385 case OPCODE_SLT: /* set on less */
1386 {
1387 GLfloat a[4], b[4], result[4];
1388 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1389 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1390 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1391 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1392 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1393 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1394 store_vector4(inst, machine, result);
1395 }
1396 break;
1397 case OPCODE_SNE: /* set on not equal */
1398 {
1399 GLfloat a[4], b[4], result[4];
1400 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1401 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1402 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1403 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1404 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1405 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1406 store_vector4(inst, machine, result);
1407 }
1408 break;
1409 case OPCODE_STR: /* set true, operands ignored */
1410 {
1411 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1412 store_vector4(inst, machine, result);
1413 }
1414 break;
1415 case OPCODE_SUB:
1416 {
1417 GLfloat a[4], b[4], result[4];
1418 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1419 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1420 result[0] = a[0] - b[0];
1421 result[1] = a[1] - b[1];
1422 result[2] = a[2] - b[2];
1423 result[3] = a[3] - b[3];
1424 store_vector4(inst, machine, result);
1425 if (DEBUG_PROG) {
1426 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1427 result[0], result[1], result[2], result[3],
1428 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001429 }
Briane80d9012007-02-23 16:53:24 -07001430 }
1431 break;
1432 case OPCODE_SWZ: /* extended swizzle */
1433 {
1434 const struct prog_src_register *source = &inst->SrcReg[0];
1435 const GLfloat *src = get_register_pointer(ctx, source, machine);
1436 GLfloat result[4];
1437 GLuint i;
1438 for (i = 0; i < 4; i++) {
1439 const GLuint swz = GET_SWZ(source->Swizzle, i);
1440 if (swz == SWIZZLE_ZERO)
1441 result[i] = 0.0;
1442 else if (swz == SWIZZLE_ONE)
1443 result[i] = 1.0;
Brian13e3b212007-02-22 16:09:40 -07001444 else {
Briane80d9012007-02-23 16:53:24 -07001445 ASSERT(swz >= 0);
1446 ASSERT(swz <= 3);
1447 result[i] = src[swz];
Brian13e3b212007-02-22 16:09:40 -07001448 }
Briane80d9012007-02-23 16:53:24 -07001449 if (source->NegateBase & (1 << i))
1450 result[i] = -result[i];
Brian13e3b212007-02-22 16:09:40 -07001451 }
Briane80d9012007-02-23 16:53:24 -07001452 store_vector4(inst, machine, result);
1453 }
1454 break;
1455 case OPCODE_TEX: /* Both ARB and NV frag prog */
1456 /* Texel lookup */
1457 {
1458 /* Note: only use the precomputed lambda value when we're
1459 * sampling texture unit [K] with texcoord[K].
1460 * Otherwise, the lambda value may have no relation to the
1461 * instruction's texcoord or texture image. Using the wrong
1462 * lambda is usually bad news.
1463 * The rest of the time, just use zero (until we get a more
1464 * sophisticated way of computing lambda).
1465 */
1466 GLfloat coord[4], color[4], lambda;
1467#if 0
1468 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1469 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1470 lambda = span->array->lambda[inst->TexSrcUnit][column];
1471 else
1472#endif
1473 lambda = 0.0;
1474 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1475 machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit,
1476 color);
1477 if (DEBUG_PROG) {
1478 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1479 "lod %f\n",
1480 color[0], color[1], color[2], color[3],
1481 inst->TexSrcUnit,
1482 coord[0], coord[1], coord[2], coord[3], lambda);
1483 }
1484 store_vector4(inst, machine, color);
1485 }
1486 break;
1487 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1488 /* Texel lookup with LOD bias */
1489 {
1490 const struct gl_texture_unit *texUnit
1491 = &ctx->Texture.Unit[inst->TexSrcUnit];
1492 GLfloat coord[4], color[4], lambda, bias;
1493#if 0
1494 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1495 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1496 lambda = span->array->lambda[inst->TexSrcUnit][column];
1497 else
1498#endif
1499 lambda = 0.0;
1500 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1501 /* coord[3] is the bias to add to lambda */
1502 bias = texUnit->LodBias + coord[3];
1503 if (texUnit->_Current)
1504 bias += texUnit->_Current->LodBias;
1505 machine->FetchTexelLod(ctx, coord, lambda + bias,
1506 inst->TexSrcUnit, color);
1507 store_vector4(inst, machine, color);
1508 }
1509 break;
1510 case OPCODE_TXD: /* GL_NV_fragment_program only */
1511 /* Texture lookup w/ partial derivatives for LOD */
1512 {
1513 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1514 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1515 fetch_vector4(ctx, &inst->SrcReg[1], machine, dtdx);
1516 fetch_vector4(ctx, &inst->SrcReg[2], machine, dtdy);
1517 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1518 inst->TexSrcUnit, color);
1519 store_vector4(inst, machine, color);
1520 }
1521 break;
1522 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1523 /* Texture lookup w/ projective divide */
1524 {
1525 GLfloat texcoord[4], color[4], lambda;
1526#if 0
1527 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1528 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1529 lambda = span->array->lambda[inst->TexSrcUnit][column];
1530 else
1531#endif
1532 lambda = 0.0;
1533 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1534 /* Not so sure about this test - if texcoord[3] is
1535 * zero, we'd probably be fine except for an ASSERT in
1536 * IROUND_POS() which gets triggered by the inf values created.
1537 */
1538 if (texcoord[3] != 0.0) {
1539 texcoord[0] /= texcoord[3];
1540 texcoord[1] /= texcoord[3];
1541 texcoord[2] /= texcoord[3];
1542 }
1543 machine->FetchTexelLod(ctx, texcoord, lambda,
1544 inst->TexSrcUnit, color);
1545 store_vector4(inst, machine, color);
1546 }
1547 break;
1548 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1549 /* Texture lookup w/ projective divide */
1550 {
1551 GLfloat texcoord[4], color[4], lambda;
1552#if 0
1553 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1554 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1555 lambda = span->array->lambda[inst->TexSrcUnit][column];
1556 else
1557#endif
1558 lambda = 0.0;
1559 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1560 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1561 texcoord[3] != 0.0) {
1562 texcoord[0] /= texcoord[3];
1563 texcoord[1] /= texcoord[3];
1564 texcoord[2] /= texcoord[3];
1565 }
1566 machine->FetchTexelLod(ctx, texcoord, lambda,
1567 inst->TexSrcUnit, color);
1568 store_vector4(inst, machine, color);
1569 }
1570 break;
1571 case OPCODE_UP2H: /* unpack two 16-bit floats */
1572 {
1573 GLfloat a[4], result[4];
1574 const GLuint *rawBits = (const GLuint *) a;
1575 GLhalfNV hx, hy;
1576 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1577 hx = rawBits[0] & 0xffff;
1578 hy = rawBits[0] >> 16;
1579 result[0] = result[2] = _mesa_half_to_float(hx);
1580 result[1] = result[3] = _mesa_half_to_float(hy);
1581 store_vector4(inst, machine, result);
1582 }
1583 break;
1584 case OPCODE_UP2US: /* unpack two GLushorts */
1585 {
1586 GLfloat a[4], result[4];
1587 const GLuint *rawBits = (const GLuint *) a;
1588 GLushort usx, usy;
1589 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1590 usx = rawBits[0] & 0xffff;
1591 usy = rawBits[0] >> 16;
1592 result[0] = result[2] = usx * (1.0f / 65535.0f);
1593 result[1] = result[3] = usy * (1.0f / 65535.0f);
1594 store_vector4(inst, machine, result);
1595 }
1596 break;
1597 case OPCODE_UP4B: /* unpack four GLbytes */
1598 {
1599 GLfloat a[4], result[4];
1600 const GLuint *rawBits = (const GLuint *) a;
1601 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1602 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1603 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1604 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1605 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1606 store_vector4(inst, machine, result);
1607 }
1608 break;
1609 case OPCODE_UP4UB: /* unpack four GLubytes */
1610 {
1611 GLfloat a[4], result[4];
1612 const GLuint *rawBits = (const GLuint *) a;
1613 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1614 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1615 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1616 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1617 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1618 store_vector4(inst, machine, result);
1619 }
1620 break;
1621 case OPCODE_XPD: /* cross product */
1622 {
1623 GLfloat a[4], b[4], result[4];
1624 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1625 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1626 result[0] = a[1] * b[2] - a[2] * b[1];
1627 result[1] = a[2] * b[0] - a[0] * b[2];
1628 result[2] = a[0] * b[1] - a[1] * b[0];
1629 result[3] = 1.0;
1630 store_vector4(inst, machine, result);
1631 }
1632 break;
1633 case OPCODE_X2D: /* 2-D matrix transform */
1634 {
1635 GLfloat a[4], b[4], c[4], result[4];
1636 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1637 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1638 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
1639 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1640 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1641 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1642 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1643 store_vector4(inst, machine, result);
1644 }
1645 break;
1646 case OPCODE_PRINT:
1647 {
1648 if (inst->SrcReg[0].File != -1) {
1649 GLfloat a[4];
1650 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1651 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1652 a[0], a[1], a[2], a[3]);
1653 }
1654 else {
1655 _mesa_printf("%s\n", (const char *) inst->Data);
1656 }
1657 }
1658 break;
1659 case OPCODE_END:
1660 return GL_TRUE;
1661 default:
1662 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1663 inst->Opcode);
1664 return GL_TRUE; /* return value doesn't matter */
Brian13e3b212007-02-22 16:09:40 -07001665
1666 }
Briane80d9012007-02-23 16:53:24 -07001667
Brian13e3b212007-02-22 16:09:40 -07001668 total++;
1669 if (total > MAX_EXEC) {
1670 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1671 return GL_TRUE;
1672 abort();
1673 }
Briane80d9012007-02-23 16:53:24 -07001674
1675 } /* for pc */
Brian13e3b212007-02-22 16:09:40 -07001676
1677#if FEATURE_MESA_program_debug
1678 CurrentMachine = NULL;
1679#endif
1680
1681 return GL_TRUE;
1682}