blob: f466cc7affbb9b7642647dad7937ec4b24cb7a39 [file] [log] [blame]
Brian13e3b212007-02-22 16:09:40 -07001/*
2 * Mesa 3-D graphics library
3 * Version: 6.5.3
4 *
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31/*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38#include "glheader.h"
39#include "colormac.h"
40#include "context.h"
41#include "program.h"
42#include "prog_execute.h"
43#include "prog_instruction.h"
44#include "prog_parameter.h"
45#include "prog_print.h"
46#include "slang_library_noise.h"
47
48
49/* See comments below for info about this */
50#define LAMBDA_ZERO 1
51
52/* debug predicate */
53#define DEBUG_PROG 0
54
55
56#if FEATURE_MESA_program_debug
57static struct gl_program_machine *CurrentMachine = NULL;
58
59/**
60 * For GL_MESA_program_debug.
61 * Return current value (4*GLfloat) of a program register.
62 * Called via ctx->Driver.GetFragmentProgramRegister().
63 */
64void
65_mesa_get_program_register(GLcontext *ctx, enum register_file file,
66 GLuint index, GLfloat val[4])
67{
68 if (CurrentMachine) {
69 switch (file) {
70 case PROGRAM_INPUT:
71 if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
72 COPY_4V(val, CurrentMachine->VertAttribs[index]);
73 }
74 else {
75 COPY_4V(val,
76 CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
77 }
78 break;
79 case PROGRAM_OUTPUT:
80 COPY_4V(val, CurrentMachine->Outputs[index]);
81 break;
82 case PROGRAM_TEMPORARY:
83 COPY_4V(val, CurrentMachine->Temporaries[index]);
84 break;
85 default:
86 _mesa_problem(NULL,
87 "bad register file in _swrast_get_program_register");
88 }
89 }
90}
91#endif /* FEATURE_MESA_program_debug */
92
93
94
95/**
96 * Return a pointer to the 4-element float vector specified by the given
97 * source register.
98 */
99static INLINE const GLfloat *
100get_register_pointer( GLcontext *ctx,
101 const struct prog_src_register *source,
102 const struct gl_program_machine *machine)
103{
104 /* XXX relative addressing... */
105 switch (source->File) {
106 case PROGRAM_TEMPORARY:
107 ASSERT(source->Index < MAX_PROGRAM_TEMPS);
108 return machine->Temporaries[source->Index];
109
110 case PROGRAM_INPUT:
111 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
112 ASSERT(source->Index < VERT_ATTRIB_MAX);
113 return machine->VertAttribs[source->Index];
114 }
115 else {
116 ASSERT(source->Index < FRAG_ATTRIB_MAX);
117 return machine->Attribs[source->Index][machine->CurElement];
118 }
119
120 case PROGRAM_OUTPUT:
121 /* This is only for PRINT */
122 ASSERT(source->Index < FRAG_RESULT_MAX);
123 return machine->Outputs[source->Index];
124
125 case PROGRAM_LOCAL_PARAM:
126 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
127 return machine->CurProgram->LocalParams[source->Index];
128
129 case PROGRAM_ENV_PARAM:
130 ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
131 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
132 return ctx->VertexProgram.Parameters[source->Index];
133 else
134 return ctx->FragmentProgram.Parameters[source->Index];
135
136 case PROGRAM_STATE_VAR:
137 /* Fallthrough */
138 case PROGRAM_CONSTANT:
139 /* Fallthrough */
140 case PROGRAM_UNIFORM:
141 /* Fallthrough */
142 case PROGRAM_NAMED_PARAM:
143 ASSERT(source->Index <
144 (GLint) machine->CurProgram->Parameters->NumParameters);
145 return machine->CurProgram->Parameters->ParameterValues[source->Index];
146
147 default:
148 _mesa_problem(ctx,
149 "Invalid input register file %d in get_register_pointer()",
150 source->File);
151 return NULL;
152 }
153}
154
155
156/**
157 * Fetch a 4-element float vector from the given source register.
158 * Apply swizzling and negating as needed.
159 */
160static void
161fetch_vector4( GLcontext *ctx,
162 const struct prog_src_register *source,
163 const struct gl_program_machine *machine,
164 const struct gl_program *program,
165 GLfloat result[4] )
166{
167 const GLfloat *src = get_register_pointer(ctx, source, machine);
168 ASSERT(src);
169
170 if (source->Swizzle == SWIZZLE_NOOP) {
171 /* no swizzling */
172 COPY_4V(result, src);
173 }
174 else {
175 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
176 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
177 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
178 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
179 result[0] = src[GET_SWZ(source->Swizzle, 0)];
180 result[1] = src[GET_SWZ(source->Swizzle, 1)];
181 result[2] = src[GET_SWZ(source->Swizzle, 2)];
182 result[3] = src[GET_SWZ(source->Swizzle, 3)];
183 }
184
185 if (source->NegateBase) {
186 result[0] = -result[0];
187 result[1] = -result[1];
188 result[2] = -result[2];
189 result[3] = -result[3];
190 }
191 if (source->Abs) {
192 result[0] = FABSF(result[0]);
193 result[1] = FABSF(result[1]);
194 result[2] = FABSF(result[2]);
195 result[3] = FABSF(result[3]);
196 }
197 if (source->NegateAbs) {
198 result[0] = -result[0];
199 result[1] = -result[1];
200 result[2] = -result[2];
201 result[3] = -result[3];
202 }
203}
204
205#if 0
206/**
207 * Fetch the derivative with respect to X for the given register.
208 * \return GL_TRUE if it was easily computed or GL_FALSE if we
209 * need to execute another instance of the program (ugh)!
210 */
211static GLboolean
212fetch_vector4_deriv( GLcontext *ctx,
213 const struct prog_src_register *source,
214 const SWspan *span,
215 char xOrY, GLint column, GLfloat result[4] )
216{
217 GLfloat src[4];
218
219 ASSERT(xOrY == 'X' || xOrY == 'Y');
220
221 switch (source->Index) {
222 case FRAG_ATTRIB_WPOS:
223 if (xOrY == 'X') {
224 src[0] = 1.0;
225 src[1] = 0.0;
226 src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
227 / ctx->DrawBuffer->_DepthMaxF;
228 src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
229 }
230 else {
231 src[0] = 0.0;
232 src[1] = 1.0;
233 src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
234 / ctx->DrawBuffer->_DepthMaxF;
235 src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
236 }
237 break;
238 case FRAG_ATTRIB_COL0:
239 case FRAG_ATTRIB_COL1:
240 if (xOrY == 'X') {
241 src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
242 src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
243 src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
244 src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
245 }
246 else {
247 src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
248 src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
249 src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
250 src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
251 }
252 break;
253 case FRAG_ATTRIB_FOGC:
254 if (xOrY == 'X') {
255 src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
256 src[1] = 0.0;
257 src[2] = 0.0;
258 src[3] = 0.0;
259 }
260 else {
261 src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
262 src[1] = 0.0;
263 src[2] = 0.0;
264 src[3] = 0.0;
265 }
266 break;
267 default:
268 assert(source->Index < FRAG_ATTRIB_MAX);
269 /* texcoord or varying */
270 if (xOrY == 'X') {
271 /* this is a little tricky - I think I've got it right */
272 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
273 + span->attrStepX[source->Index][3] * column);
274 src[0] = span->attrStepX[source->Index][0] * invQ;
275 src[1] = span->attrStepX[source->Index][1] * invQ;
276 src[2] = span->attrStepX[source->Index][2] * invQ;
277 src[3] = span->attrStepX[source->Index][3] * invQ;
278 }
279 else {
280 /* Tricky, as above, but in Y direction */
281 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
282 + span->attrStepY[source->Index][3]);
283 src[0] = span->attrStepY[source->Index][0] * invQ;
284 src[1] = span->attrStepY[source->Index][1] * invQ;
285 src[2] = span->attrStepY[source->Index][2] * invQ;
286 src[3] = span->attrStepY[source->Index][3] * invQ;
287 }
288 break;
289 }
290
291 result[0] = src[GET_SWZ(source->Swizzle, 0)];
292 result[1] = src[GET_SWZ(source->Swizzle, 1)];
293 result[2] = src[GET_SWZ(source->Swizzle, 2)];
294 result[3] = src[GET_SWZ(source->Swizzle, 3)];
295
296 if (source->NegateBase) {
297 result[0] = -result[0];
298 result[1] = -result[1];
299 result[2] = -result[2];
300 result[3] = -result[3];
301 }
302 if (source->Abs) {
303 result[0] = FABSF(result[0]);
304 result[1] = FABSF(result[1]);
305 result[2] = FABSF(result[2]);
306 result[3] = FABSF(result[3]);
307 }
308 if (source->NegateAbs) {
309 result[0] = -result[0];
310 result[1] = -result[1];
311 result[2] = -result[2];
312 result[3] = -result[3];
313 }
314 return GL_TRUE;
315}
316#endif
317
318
319/**
320 * As above, but only return result[0] element.
321 */
322static void
323fetch_vector1( GLcontext *ctx,
324 const struct prog_src_register *source,
325 const struct gl_program_machine *machine,
326 const struct gl_program *program,
327 GLfloat result[4] )
328{
329 const GLfloat *src = get_register_pointer(ctx, source, machine);
330 ASSERT(src);
331
332 result[0] = src[GET_SWZ(source->Swizzle, 0)];
333
334 if (source->NegateBase) {
335 result[0] = -result[0];
336 }
337 if (source->Abs) {
338 result[0] = FABSF(result[0]);
339 }
340 if (source->NegateAbs) {
341 result[0] = -result[0];
342 }
343}
344
345
346/**
347 * Test value against zero and return GT, LT, EQ or UN if NaN.
348 */
349static INLINE GLuint
350generate_cc( float value )
351{
352 if (value != value)
353 return COND_UN; /* NaN */
354 if (value > 0.0F)
355 return COND_GT;
356 if (value < 0.0F)
357 return COND_LT;
358 return COND_EQ;
359}
360
361
362/**
363 * Test if the ccMaskRule is satisfied by the given condition code.
364 * Used to mask destination writes according to the current condition code.
365 */
366static INLINE GLboolean
367test_cc(GLuint condCode, GLuint ccMaskRule)
368{
369 switch (ccMaskRule) {
370 case COND_EQ: return (condCode == COND_EQ);
371 case COND_NE: return (condCode != COND_EQ);
372 case COND_LT: return (condCode == COND_LT);
373 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
374 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
375 case COND_GT: return (condCode == COND_GT);
376 case COND_TR: return GL_TRUE;
377 case COND_FL: return GL_FALSE;
378 default: return GL_TRUE;
379 }
380}
381
382
383/**
384 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
385 * or GL_FALSE to indicate result.
386 */
387static INLINE GLboolean
388eval_condition(const struct gl_program_machine *machine,
389 const struct prog_instruction *inst)
390{
391 const GLuint swizzle = inst->DstReg.CondSwizzle;
392 const GLuint condMask = inst->DstReg.CondMask;
393 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
394 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
395 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
396 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
397 return GL_TRUE;
398 }
399 else {
400 return GL_FALSE;
401 }
402}
403
404
405
406/**
407 * Store 4 floats into a register. Observe the instructions saturate and
408 * set-condition-code flags.
409 */
410static void
411store_vector4( const struct prog_instruction *inst,
412 struct gl_program_machine *machine,
413 const GLfloat value[4] )
414{
415 const struct prog_dst_register *dest = &(inst->DstReg);
416 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
417 GLfloat *dstReg;
418 GLfloat dummyReg[4];
419 GLfloat clampedValue[4];
420 GLuint writeMask = dest->WriteMask;
421
422 switch (dest->File) {
423 case PROGRAM_OUTPUT:
424 dstReg = machine->Outputs[dest->Index];
425 break;
426 case PROGRAM_TEMPORARY:
427 dstReg = machine->Temporaries[dest->Index];
428 break;
429 case PROGRAM_WRITE_ONLY:
430 dstReg = dummyReg;
431 return;
432 default:
433 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
434 return;
435 }
436
437#if 0
438 if (value[0] > 1.0e10 ||
439 IS_INF_OR_NAN(value[0]) ||
440 IS_INF_OR_NAN(value[1]) ||
441 IS_INF_OR_NAN(value[2]) ||
442 IS_INF_OR_NAN(value[3]) )
443 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
444#endif
445
446 if (clamp) {
447 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
448 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
449 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
450 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
451 value = clampedValue;
452 }
453
454 if (dest->CondMask != COND_TR) {
455 /* condition codes may turn off some writes */
456 if (writeMask & WRITEMASK_X) {
457 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
458 dest->CondMask))
459 writeMask &= ~WRITEMASK_X;
460 }
461 if (writeMask & WRITEMASK_Y) {
462 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
463 dest->CondMask))
464 writeMask &= ~WRITEMASK_Y;
465 }
466 if (writeMask & WRITEMASK_Z) {
467 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
468 dest->CondMask))
469 writeMask &= ~WRITEMASK_Z;
470 }
471 if (writeMask & WRITEMASK_W) {
472 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
473 dest->CondMask))
474 writeMask &= ~WRITEMASK_W;
475 }
476 }
477
478 if (writeMask & WRITEMASK_X)
479 dstReg[0] = value[0];
480 if (writeMask & WRITEMASK_Y)
481 dstReg[1] = value[1];
482 if (writeMask & WRITEMASK_Z)
483 dstReg[2] = value[2];
484 if (writeMask & WRITEMASK_W)
485 dstReg[3] = value[3];
486
487 if (inst->CondUpdate) {
488 if (writeMask & WRITEMASK_X)
489 machine->CondCodes[0] = generate_cc(value[0]);
490 if (writeMask & WRITEMASK_Y)
491 machine->CondCodes[1] = generate_cc(value[1]);
492 if (writeMask & WRITEMASK_Z)
493 machine->CondCodes[2] = generate_cc(value[2]);
494 if (writeMask & WRITEMASK_W)
495 machine->CondCodes[3] = generate_cc(value[3]);
496 }
497}
498
499
500#if 0
501/**
502 * Initialize a new machine state instance from an existing one, adding
503 * the partial derivatives onto the input registers.
504 * Used to implement DDX and DDY instructions in non-trivial cases.
505 */
506static void
507init_machine_deriv( GLcontext *ctx,
508 const struct gl_program_machine *machine,
509 const struct gl_fragment_program *program,
510 const SWspan *span, char xOrY,
511 struct gl_program_machine *dMachine )
512{
513 GLuint attr;
514
515 ASSERT(xOrY == 'X' || xOrY == 'Y');
516
517 /* copy existing machine */
518 _mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
519
520 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
521 /* XXX also need to do this when using valgrind */
522 /* Clear temporary registers (undefined for ARB_f_p) */
523 _mesa_bzero( (void*) machine->Temporaries,
524 MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
525 }
526
527 /* Add derivatives */
528 if (program->Base.InputsRead & FRAG_BIT_WPOS) {
529 GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
530 if (xOrY == 'X') {
531 wpos[0] += 1.0F;
532 wpos[1] += 0.0F;
533 wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
534 wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
535 }
536 else {
537 wpos[0] += 0.0F;
538 wpos[1] += 1.0F;
539 wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
540 wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
541 }
542 }
543
544 /* primary, secondary colors */
545 for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
546 if (program->Base.InputsRead & (1 << attr)) {
547 GLfloat *col = machine->Attribs[attr][machine->CurElement];
548 if (xOrY == 'X') {
549 col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
550 col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
551 col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
552 col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
553 }
554 else {
555 col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
556 col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
557 col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
558 col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
559 }
560 }
561 }
562 if (program->Base.InputsRead & FRAG_BIT_FOGC) {
563 GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
564 if (xOrY == 'X') {
565 fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
566 }
567 else {
568 fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
569 }
570 }
571 /* texcoord and varying vars */
572 for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
573 if (program->Base.InputsRead & (1 << attr)) {
574 GLfloat *val = machine->Attribs[attr][machine->CurElement];
575 /* XXX perspective-correct interpolation */
576 if (xOrY == 'X') {
577 val[0] += span->attrStepX[attr][0];
578 val[1] += span->attrStepX[attr][1];
579 val[2] += span->attrStepX[attr][2];
580 val[3] += span->attrStepX[attr][3];
581 }
582 else {
583 val[0] += span->attrStepY[attr][0];
584 val[1] += span->attrStepY[attr][1];
585 val[2] += span->attrStepY[attr][2];
586 val[3] += span->attrStepY[attr][3];
587 }
588 }
589 }
590
591 /* init condition codes */
592 dMachine->CondCodes[0] = COND_EQ;
593 dMachine->CondCodes[1] = COND_EQ;
594 dMachine->CondCodes[2] = COND_EQ;
595 dMachine->CondCodes[3] = COND_EQ;
596}
597#endif
598
599
600/**
601 * Execute the given vertex/fragment program.
602 *
603 * \param ctx - rendering context
604 * \param program - the fragment program to execute
605 * \param machine - machine state (register file)
606 * \param maxInst - max number of instructions to execute
607 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
608 */
609GLboolean
610_mesa_execute_program(GLcontext *ctx,
611 const struct gl_program *program, GLuint maxInst,
612 struct gl_program_machine *machine, GLuint element)
613{
614 const GLuint MAX_EXEC = 10000;
615 GLint pc, total = 0;
616
617 machine->CurProgram = program;
618
619 if (DEBUG_PROG) {
620 printf("execute program %u --------------------\n", program->Id);
621 }
622
623#if FEATURE_MESA_program_debug
624 CurrentMachine = machine;
625#endif
626
627 for (pc = 0; pc < maxInst; pc++) {
628 const struct prog_instruction *inst = program->Instructions + pc;
629
630#if FEATURE_MESA_program_debug
631 if (ctx->FragmentProgram.CallbackEnabled &&
632 ctx->FragmentProgram.Callback) {
633 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
634 ctx->FragmentProgram.Callback(program->Target,
635 ctx->FragmentProgram.CallbackData);
636 }
637#endif
638
639 if (DEBUG_PROG) {
640 _mesa_print_instruction(inst);
641 }
642
643 switch (inst->Opcode) {
644 case OPCODE_ABS:
645 {
646 GLfloat a[4], result[4];
647 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
648 result[0] = FABSF(a[0]);
649 result[1] = FABSF(a[1]);
650 result[2] = FABSF(a[2]);
651 result[3] = FABSF(a[3]);
652 store_vector4( inst, machine, result );
653 }
654 break;
655 case OPCODE_ADD:
656 {
657 GLfloat a[4], b[4], result[4];
658 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
659 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
660 result[0] = a[0] + b[0];
661 result[1] = a[1] + b[1];
662 result[2] = a[2] + b[2];
663 result[3] = a[3] + b[3];
664 store_vector4( inst, machine, result );
665 if (DEBUG_PROG) {
666 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
667 result[0], result[1], result[2], result[3],
668 a[0], a[1], a[2], a[3],
669 b[0], b[1], b[2], b[3]);
670 }
671 }
672 break;
673 case OPCODE_BGNLOOP:
674 /* no-op */
675 break;
676 case OPCODE_ENDLOOP:
677 /* subtract 1 here since pc is incremented by for(pc) loop */
678 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
679 break;
680 case OPCODE_BGNSUB: /* begin subroutine */
681 break;
682 case OPCODE_ENDSUB: /* end subroutine */
683 break;
684 case OPCODE_BRA: /* branch (conditional) */
685 /* fall-through */
686 case OPCODE_BRK: /* break out of loop (conditional) */
687 /* fall-through */
688 case OPCODE_CONT: /* continue loop (conditional) */
689 if (eval_condition(machine, inst)) {
690 /* take branch */
691 /* Subtract 1 here since we'll do pc++ at end of for-loop */
692 pc = inst->BranchTarget - 1;
693 }
694 break;
695 case OPCODE_CAL: /* Call subroutine (conditional) */
696 if (eval_condition(machine, inst)) {
697 /* call the subroutine */
698 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
699 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
700 }
701 machine->CallStack[machine->StackDepth++] = pc + 1;
702 pc = inst->BranchTarget; /* XXX - 1 ??? */
703 }
704 break;
705 case OPCODE_CMP:
706 {
707 GLfloat a[4], b[4], c[4], result[4];
708 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
709 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
710 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
711 result[0] = a[0] < 0.0F ? b[0] : c[0];
712 result[1] = a[1] < 0.0F ? b[1] : c[1];
713 result[2] = a[2] < 0.0F ? b[2] : c[2];
714 result[3] = a[3] < 0.0F ? b[3] : c[3];
715 store_vector4( inst, machine, result );
716 }
717 break;
718 case OPCODE_COS:
719 {
720 GLfloat a[4], result[4];
721 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
722 result[0] = result[1] = result[2] = result[3]
723 = (GLfloat) _mesa_cos(a[0]);
724 store_vector4( inst, machine, result );
725 }
726 break;
727 case OPCODE_DDX: /* Partial derivative with respect to X */
728 {
729#if 0
730 GLfloat a[4], aNext[4], result[4];
731 struct gl_program_machine dMachine;
732 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
733 column, result)) {
734 /* This is tricky. Make a copy of the current machine state,
735 * increment the input registers by the dx or dy partial
736 * derivatives, then re-execute the program up to the
737 * preceeding instruction, then fetch the source register.
738 * Finally, find the difference in the register values for
739 * the original and derivative runs.
740 */
741 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
742 init_machine_deriv(ctx, machine, program, span,
743 'X', &dMachine);
744 execute_program(ctx, program, pc, &dMachine, span, column);
745 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
746 result[0] = aNext[0] - a[0];
747 result[1] = aNext[1] - a[1];
748 result[2] = aNext[2] - a[2];
749 result[3] = aNext[3] - a[3];
750 }
751 store_vector4( inst, machine, result );
752#else
753 static const GLfloat result[4] = { 0, 0, 0, 0 };
754 store_vector4( inst, machine, result );
755#endif
756 }
757 break;
758 case OPCODE_DDY: /* Partial derivative with respect to Y */
759 {
760#if 0
761 GLfloat a[4], aNext[4], result[4];
762 struct gl_program_machine dMachine;
763 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
764 column, result)) {
765 init_machine_deriv(ctx, machine, program, span,
766 'Y', &dMachine);
767 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
768 execute_program(ctx, program, pc, &dMachine, span, column);
769 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
770 result[0] = aNext[0] - a[0];
771 result[1] = aNext[1] - a[1];
772 result[2] = aNext[2] - a[2];
773 result[3] = aNext[3] - a[3];
774 }
775 store_vector4( inst, machine, result );
776#else
777 static const GLfloat result[4] = { 0, 0, 0, 0 };
778 store_vector4( inst, machine, result );
779#endif
780 }
781 break;
782 case OPCODE_DP3:
783 {
784 GLfloat a[4], b[4], result[4];
785 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
786 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
787 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
788 store_vector4( inst, machine, result );
789 if (DEBUG_PROG) {
790 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
791 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
792 }
793 }
794 break;
795 case OPCODE_DP4:
796 {
797 GLfloat a[4], b[4], result[4];
798 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
799 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
800 result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
801 store_vector4( inst, machine, result );
802 if (DEBUG_PROG) {
803 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
804 result[0], a[0], a[1], a[2], a[3],
805 b[0], b[1], b[2], b[3]);
806 }
807 }
808 break;
809 case OPCODE_DPH:
810 {
811 GLfloat a[4], b[4], result[4];
812 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
813 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
814 result[0] = result[1] = result[2] = result[3] =
815 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
816 store_vector4( inst, machine, result );
817 }
818 break;
819 case OPCODE_DST: /* Distance vector */
820 {
821 GLfloat a[4], b[4], result[4];
822 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
823 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
824 result[0] = 1.0F;
825 result[1] = a[1] * b[1];
826 result[2] = a[2];
827 result[3] = b[3];
828 store_vector4( inst, machine, result );
829 }
830 break;
831 case OPCODE_EX2: /* Exponential base 2 */
832 {
833 GLfloat a[4], result[4];
834 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
835 result[0] = result[1] = result[2] = result[3] =
836 (GLfloat) _mesa_pow(2.0, a[0]);
837 store_vector4( inst, machine, result );
838 }
839 break;
840 case OPCODE_FLR:
841 {
842 GLfloat a[4], result[4];
843 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
844 result[0] = FLOORF(a[0]);
845 result[1] = FLOORF(a[1]);
846 result[2] = FLOORF(a[2]);
847 result[3] = FLOORF(a[3]);
848 store_vector4( inst, machine, result );
849 }
850 break;
851 case OPCODE_FRC:
852 {
853 GLfloat a[4], result[4];
854 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
855 result[0] = a[0] - FLOORF(a[0]);
856 result[1] = a[1] - FLOORF(a[1]);
857 result[2] = a[2] - FLOORF(a[2]);
858 result[3] = a[3] - FLOORF(a[3]);
859 store_vector4( inst, machine, result );
860 }
861 break;
862 case OPCODE_IF:
863 if (eval_condition(machine, inst)) {
864 /* do if-clause (just continue execution) */
865 }
866 else {
867 /* go to the instruction after ELSE or ENDIF */
868 assert(inst->BranchTarget >= 0);
869 pc = inst->BranchTarget - 1;
870 }
871 break;
872 case OPCODE_ELSE:
873 /* goto ENDIF */
874 assert(inst->BranchTarget >= 0);
875 pc = inst->BranchTarget - 1;
876 break;
877 case OPCODE_ENDIF:
878 /* nothing */
879 break;
880 case OPCODE_INT: /* float to int */
881 {
882 GLfloat a[4], result[4];
883 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
884 result[0] = (GLfloat) (GLint) a[0];
885 result[1] = (GLfloat) (GLint) a[1];
886 result[2] = (GLfloat) (GLint) a[2];
887 result[3] = (GLfloat) (GLint) a[3];
888 store_vector4( inst, machine, result );
889 }
890 break;
891 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
892 if (eval_condition(machine, inst)) {
893 return GL_FALSE;
894 }
895 break;
896 case OPCODE_KIL: /* ARB_f_p only */
897 {
898 GLfloat a[4];
899 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
900 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
901 return GL_FALSE;
902 }
903 }
904 break;
905 case OPCODE_LG2: /* log base 2 */
906 {
907 GLfloat a[4], result[4];
908 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
909 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
910 store_vector4( inst, machine, result );
911 }
912 break;
913 case OPCODE_LIT:
914 {
915 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
916 GLfloat a[4], result[4];
917 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
918 a[0] = MAX2(a[0], 0.0F);
919 a[1] = MAX2(a[1], 0.0F);
920 /* XXX ARB version clamps a[3], NV version doesn't */
921 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
922 result[0] = 1.0F;
923 result[1] = a[0];
924 /* XXX we could probably just use pow() here */
925 if (a[0] > 0.0F) {
926 if (a[1] == 0.0 && a[3] == 0.0)
927 result[2] = 1.0;
928 else
929 result[2] = EXPF(a[3] * LOGF(a[1]));
930 }
931 else {
932 result[2] = 0.0;
933 }
934 result[3] = 1.0F;
935 store_vector4( inst, machine, result );
936 if (DEBUG_PROG) {
937 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
938 result[0], result[1], result[2], result[3],
939 a[0], a[1], a[2], a[3]);
940 }
941 }
942 break;
943 case OPCODE_LRP:
944 {
945 GLfloat a[4], b[4], c[4], result[4];
946 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
947 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
948 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
949 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
950 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
951 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
952 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
953 store_vector4( inst, machine, result );
954 if (DEBUG_PROG) {
955 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
956 "(%g %g %g %g), (%g %g %g %g)\n",
957 result[0], result[1], result[2], result[3],
958 a[0], a[1], a[2], a[3],
959 b[0], b[1], b[2], b[3],
960 c[0], c[1], c[2], c[3]);
961 }
962 }
963 break;
964 case OPCODE_MAD:
965 {
966 GLfloat a[4], b[4], c[4], result[4];
967 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
968 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
969 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
970 result[0] = a[0] * b[0] + c[0];
971 result[1] = a[1] * b[1] + c[1];
972 result[2] = a[2] * b[2] + c[2];
973 result[3] = a[3] * b[3] + c[3];
974 store_vector4( inst, machine, result );
975 if (DEBUG_PROG) {
976 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
977 "(%g %g %g %g) + (%g %g %g %g)\n",
978 result[0], result[1], result[2], result[3],
979 a[0], a[1], a[2], a[3],
980 b[0], b[1], b[2], b[3],
981 c[0], c[1], c[2], c[3]);
982 }
983 }
984 break;
985 case OPCODE_MAX:
986 {
987 GLfloat a[4], b[4], result[4];
988 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
989 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
990 result[0] = MAX2(a[0], b[0]);
991 result[1] = MAX2(a[1], b[1]);
992 result[2] = MAX2(a[2], b[2]);
993 result[3] = MAX2(a[3], b[3]);
994 store_vector4( inst, machine, result );
995 if (DEBUG_PROG) {
996 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
997 result[0], result[1], result[2], result[3],
998 a[0], a[1], a[2], a[3],
999 b[0], b[1], b[2], b[3]);
1000 }
1001 }
1002 break;
1003 case OPCODE_MIN:
1004 {
1005 GLfloat a[4], b[4], result[4];
1006 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1007 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1008 result[0] = MIN2(a[0], b[0]);
1009 result[1] = MIN2(a[1], b[1]);
1010 result[2] = MIN2(a[2], b[2]);
1011 result[3] = MIN2(a[3], b[3]);
1012 store_vector4( inst, machine, result );
1013 }
1014 break;
1015 case OPCODE_MOV:
1016 {
1017 GLfloat result[4];
1018 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
1019 store_vector4( inst, machine, result );
1020 if (DEBUG_PROG) {
1021 printf("MOV (%g %g %g %g)\n",
1022 result[0], result[1], result[2], result[3]);
1023 }
1024 }
1025 break;
1026 case OPCODE_MUL:
1027 {
1028 GLfloat a[4], b[4], result[4];
1029 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1030 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1031 result[0] = a[0] * b[0];
1032 result[1] = a[1] * b[1];
1033 result[2] = a[2] * b[2];
1034 result[3] = a[3] * b[3];
1035 store_vector4( inst, machine, result );
1036 if (DEBUG_PROG) {
1037 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1038 result[0], result[1], result[2], result[3],
1039 a[0], a[1], a[2], a[3],
1040 b[0], b[1], b[2], b[3]);
1041 }
1042 }
1043 break;
1044 case OPCODE_NOISE1:
1045 {
1046 GLfloat a[4], result[4];
1047 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1048 result[0] =
1049 result[1] =
1050 result[2] =
1051 result[3] = _slang_library_noise1(a[0]);
1052 store_vector4( inst, machine, result );
1053 }
1054 break;
1055 case OPCODE_NOISE2:
1056 {
1057 GLfloat a[4], result[4];
1058 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1059 result[0] =
1060 result[1] =
1061 result[2] =
1062 result[3] = _slang_library_noise2(a[0], a[1]);
1063 store_vector4( inst, machine, result );
1064 }
1065 break;
1066 case OPCODE_NOISE3:
1067 {
1068 GLfloat a[4], result[4];
1069 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1070 result[0] =
1071 result[1] =
1072 result[2] =
1073 result[3] = _slang_library_noise3(a[0], a[1], a[2]);
1074 store_vector4( inst, machine, result );
1075 }
1076 break;
1077 case OPCODE_NOISE4:
1078 {
1079 GLfloat a[4], result[4];
1080 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1081 result[0] =
1082 result[1] =
1083 result[2] =
1084 result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
1085 store_vector4( inst, machine, result );
1086 }
1087 break;
1088 case OPCODE_NOP:
1089 break;
1090 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1091 {
1092 GLfloat a[4], result[4];
1093 GLhalfNV hx, hy;
1094 GLuint *rawResult = (GLuint *) result;
1095 GLuint twoHalves;
1096 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1097 hx = _mesa_float_to_half(a[0]);
1098 hy = _mesa_float_to_half(a[1]);
1099 twoHalves = hx | (hy << 16);
1100 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1101 = twoHalves;
1102 store_vector4( inst, machine, result );
1103 }
1104 break;
1105 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1106 {
1107 GLfloat a[4], result[4];
1108 GLuint usx, usy, *rawResult = (GLuint *) result;
1109 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1110 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1111 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1112 usx = IROUND(a[0] * 65535.0F);
1113 usy = IROUND(a[1] * 65535.0F);
1114 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1115 = usx | (usy << 16);
1116 store_vector4( inst, machine, result );
1117 }
1118 break;
1119 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1120 {
1121 GLfloat a[4], result[4];
1122 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1123 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1124 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1125 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1126 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1127 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1128 ubx = IROUND(127.0F * a[0] + 128.0F);
1129 uby = IROUND(127.0F * a[1] + 128.0F);
1130 ubz = IROUND(127.0F * a[2] + 128.0F);
1131 ubw = IROUND(127.0F * a[3] + 128.0F);
1132 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1133 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1134 store_vector4( inst, machine, result );
1135 }
1136 break;
1137 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1138 {
1139 GLfloat a[4], result[4];
1140 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1141 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1142 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1143 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1144 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1145 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1146 ubx = IROUND(255.0F * a[0]);
1147 uby = IROUND(255.0F * a[1]);
1148 ubz = IROUND(255.0F * a[2]);
1149 ubw = IROUND(255.0F * a[3]);
1150 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1151 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1152 store_vector4( inst, machine, result );
1153 }
1154 break;
1155 case OPCODE_POW:
1156 {
1157 GLfloat a[4], b[4], result[4];
1158 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1159 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1160 result[0] = result[1] = result[2] = result[3]
1161 = (GLfloat)_mesa_pow(a[0], b[0]);
1162 store_vector4( inst, machine, result );
1163 }
1164 break;
1165 case OPCODE_RCP:
1166 {
1167 GLfloat a[4], result[4];
1168 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1169 if (DEBUG_PROG) {
1170 if (a[0] == 0)
1171 printf("RCP(0)\n");
1172 else if (IS_INF_OR_NAN(a[0]))
1173 printf("RCP(inf)\n");
1174 }
1175 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1176 store_vector4( inst, machine, result );
1177 }
1178 break;
1179 case OPCODE_RET: /* return from subroutine (conditional) */
1180 if (eval_condition(machine, inst)) {
1181 if (machine->StackDepth == 0) {
1182 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1183 }
1184 pc = machine->CallStack[--machine->StackDepth];
1185 }
1186 break;
1187 case OPCODE_RFL: /* reflection vector */
1188 {
1189 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1190 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1191 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1192 tmpW = DOT3(axis, axis);
1193 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1194 result[0] = tmpX * axis[0] - dir[0];
1195 result[1] = tmpX * axis[1] - dir[1];
1196 result[2] = tmpX * axis[2] - dir[2];
1197 /* result[3] is never written! XXX enforce in parser! */
1198 store_vector4( inst, machine, result );
1199 }
1200 break;
1201 case OPCODE_RSQ: /* 1 / sqrt() */
1202 {
1203 GLfloat a[4], result[4];
1204 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1205 a[0] = FABSF(a[0]);
1206 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1207 store_vector4( inst, machine, result );
1208 if (DEBUG_PROG) {
1209 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1210 }
1211 }
1212 break;
1213 case OPCODE_SCS: /* sine and cos */
1214 {
1215 GLfloat a[4], result[4];
1216 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1217 result[0] = (GLfloat) _mesa_cos(a[0]);
1218 result[1] = (GLfloat) _mesa_sin(a[0]);
1219 result[2] = 0.0; /* undefined! */
1220 result[3] = 0.0; /* undefined! */
1221 store_vector4( inst, machine, result );
1222 }
1223 break;
1224 case OPCODE_SEQ: /* set on equal */
1225 {
1226 GLfloat a[4], b[4], result[4];
1227 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1228 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1229 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1230 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1231 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1232 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1233 store_vector4( inst, machine, result );
1234 }
1235 break;
1236 case OPCODE_SFL: /* set false, operands ignored */
1237 {
1238 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1239 store_vector4( inst, machine, result );
1240 }
1241 break;
1242 case OPCODE_SGE: /* set on greater or equal */
1243 {
1244 GLfloat a[4], b[4], result[4];
1245 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1246 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1247 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1248 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1249 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1250 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1251 store_vector4( inst, machine, result );
1252 }
1253 break;
1254 case OPCODE_SGT: /* set on greater */
1255 {
1256 GLfloat a[4], b[4], result[4];
1257 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1258 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1259 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1260 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1261 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1262 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1263 store_vector4( inst, machine, result );
1264 if (DEBUG_PROG) {
1265 printf("SGT %g %g %g %g\n",
1266 result[0], result[1], result[2], result[3]);
1267 }
1268 }
1269 break;
1270 case OPCODE_SIN:
1271 {
1272 GLfloat a[4], result[4];
1273 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1274 result[0] = result[1] = result[2] = result[3]
1275 = (GLfloat) _mesa_sin(a[0]);
1276 store_vector4( inst, machine, result );
1277 }
1278 break;
1279 case OPCODE_SLE: /* set on less or equal */
1280 {
1281 GLfloat a[4], b[4], result[4];
1282 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1283 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1284 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1285 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1286 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1287 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1288 store_vector4( inst, machine, result );
1289 }
1290 break;
1291 case OPCODE_SLT: /* set on less */
1292 {
1293 GLfloat a[4], b[4], result[4];
1294 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1295 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1296 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1297 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1298 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1299 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1300 store_vector4( inst, machine, result );
1301 }
1302 break;
1303 case OPCODE_SNE: /* set on not equal */
1304 {
1305 GLfloat a[4], b[4], result[4];
1306 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1307 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1308 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1309 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1310 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1311 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1312 store_vector4( inst, machine, result );
1313 }
1314 break;
1315 case OPCODE_STR: /* set true, operands ignored */
1316 {
1317 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1318 store_vector4( inst, machine, result );
1319 }
1320 break;
1321 case OPCODE_SUB:
1322 {
1323 GLfloat a[4], b[4], result[4];
1324 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1325 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1326 result[0] = a[0] - b[0];
1327 result[1] = a[1] - b[1];
1328 result[2] = a[2] - b[2];
1329 result[3] = a[3] - b[3];
1330 store_vector4( inst, machine, result );
1331 if (DEBUG_PROG) {
1332 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1333 result[0], result[1], result[2], result[3],
1334 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1335 }
1336 }
1337 break;
1338 case OPCODE_SWZ: /* extended swizzle */
1339 {
1340 const struct prog_src_register *source = &inst->SrcReg[0];
1341 const GLfloat *src = get_register_pointer(ctx, source, machine);
1342 GLfloat result[4];
1343 GLuint i;
1344 for (i = 0; i < 4; i++) {
1345 const GLuint swz = GET_SWZ(source->Swizzle, i);
1346 if (swz == SWIZZLE_ZERO)
1347 result[i] = 0.0;
1348 else if (swz == SWIZZLE_ONE)
1349 result[i] = 1.0;
1350 else {
1351 ASSERT(swz >= 0);
1352 ASSERT(swz <= 3);
1353 result[i] = src[swz];
1354 }
1355 if (source->NegateBase & (1 << i))
1356 result[i] = -result[i];
1357 }
1358 store_vector4( inst, machine, result );
1359 }
1360 break;
1361 case OPCODE_TEX: /* Both ARB and NV frag prog */
1362 /* Texel lookup */
1363 {
1364 /* Note: only use the precomputed lambda value when we're
1365 * sampling texture unit [K] with texcoord[K].
1366 * Otherwise, the lambda value may have no relation to the
1367 * instruction's texcoord or texture image. Using the wrong
1368 * lambda is usually bad news.
1369 * The rest of the time, just use zero (until we get a more
1370 * sophisticated way of computing lambda).
1371 */
1372 GLfloat coord[4], color[4], lambda;
1373#if 0
1374 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1375 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1376 lambda = span->array->lambda[inst->TexSrcUnit][column];
1377 else
1378#endif
1379 lambda = 0.0;
1380 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1381 machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit, color);
1382 if (DEBUG_PROG) {
1383 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1384 "lod %f\n",
1385 color[0], color[1], color[2], color[3],
1386 inst->TexSrcUnit,
1387 coord[0], coord[1], coord[2], coord[3], lambda);
1388 }
1389 store_vector4( inst, machine, color );
1390 }
1391 break;
1392 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1393 /* Texel lookup with LOD bias */
1394 {
1395 const struct gl_texture_unit *texUnit
1396 = &ctx->Texture.Unit[inst->TexSrcUnit];
1397 GLfloat coord[4], color[4], lambda, bias;
1398#if 0
1399 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1400 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1401 lambda = span->array->lambda[inst->TexSrcUnit][column];
1402 else
1403#endif
1404 lambda = 0.0;
1405 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1406 /* coord[3] is the bias to add to lambda */
1407 bias = texUnit->LodBias + coord[3];
1408 if (texUnit->_Current)
1409 bias += texUnit->_Current->LodBias;
1410 machine->FetchTexelLod(ctx, coord, lambda + bias,
1411 inst->TexSrcUnit, color);
1412 store_vector4( inst, machine, color );
1413 }
1414 break;
1415 case OPCODE_TXD: /* GL_NV_fragment_program only */
1416 /* Texture lookup w/ partial derivatives for LOD */
1417 {
1418 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1419 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1420 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1421 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1422 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1423 inst->TexSrcUnit, color );
1424 store_vector4( inst, machine, color );
1425 }
1426 break;
1427 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1428 /* Texture lookup w/ projective divide */
1429 {
1430 GLfloat texcoord[4], color[4], lambda;
1431#if 0
1432 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1433 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1434 lambda = span->array->lambda[inst->TexSrcUnit][column];
1435 else
1436#endif
1437 lambda = 0.0;
1438 fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1439 /* Not so sure about this test - if texcoord[3] is
1440 * zero, we'd probably be fine except for an ASSERT in
1441 * IROUND_POS() which gets triggered by the inf values created.
1442 */
1443 if (texcoord[3] != 0.0) {
1444 texcoord[0] /= texcoord[3];
1445 texcoord[1] /= texcoord[3];
1446 texcoord[2] /= texcoord[3];
1447 }
1448 machine->FetchTexelLod(ctx, texcoord, lambda,
1449 inst->TexSrcUnit, color);
1450 store_vector4( inst, machine, color );
1451 }
1452 break;
1453 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1454 /* Texture lookup w/ projective divide */
1455 {
1456 GLfloat texcoord[4], color[4], lambda;
1457#if 0
1458 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1459 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1460 lambda = span->array->lambda[inst->TexSrcUnit][column];
1461 else
1462#endif
1463 lambda = 0.0;
1464 fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1465 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1466 texcoord[3] != 0.0) {
1467 texcoord[0] /= texcoord[3];
1468 texcoord[1] /= texcoord[3];
1469 texcoord[2] /= texcoord[3];
1470 }
1471 machine->FetchTexelLod(ctx, texcoord, lambda,
1472 inst->TexSrcUnit, color);
1473 store_vector4( inst, machine, color );
1474 }
1475 break;
1476 case OPCODE_UP2H: /* unpack two 16-bit floats */
1477 {
1478 GLfloat a[4], result[4];
1479 const GLuint *rawBits = (const GLuint *) a;
1480 GLhalfNV hx, hy;
1481 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1482 hx = rawBits[0] & 0xffff;
1483 hy = rawBits[0] >> 16;
1484 result[0] = result[2] = _mesa_half_to_float(hx);
1485 result[1] = result[3] = _mesa_half_to_float(hy);
1486 store_vector4( inst, machine, result );
1487 }
1488 break;
1489 case OPCODE_UP2US: /* unpack two GLushorts */
1490 {
1491 GLfloat a[4], result[4];
1492 const GLuint *rawBits = (const GLuint *) a;
1493 GLushort usx, usy;
1494 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1495 usx = rawBits[0] & 0xffff;
1496 usy = rawBits[0] >> 16;
1497 result[0] = result[2] = usx * (1.0f / 65535.0f);
1498 result[1] = result[3] = usy * (1.0f / 65535.0f);
1499 store_vector4( inst, machine, result );
1500 }
1501 break;
1502 case OPCODE_UP4B: /* unpack four GLbytes */
1503 {
1504 GLfloat a[4], result[4];
1505 const GLuint *rawBits = (const GLuint *) a;
1506 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1507 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1508 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1509 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1510 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1511 store_vector4( inst, machine, result );
1512 }
1513 break;
1514 case OPCODE_UP4UB: /* unpack four GLubytes */
1515 {
1516 GLfloat a[4], result[4];
1517 const GLuint *rawBits = (const GLuint *) a;
1518 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1519 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1520 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1521 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1522 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1523 store_vector4( inst, machine, result );
1524 }
1525 break;
1526 case OPCODE_XPD: /* cross product */
1527 {
1528 GLfloat a[4], b[4], result[4];
1529 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1530 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1531 result[0] = a[1] * b[2] - a[2] * b[1];
1532 result[1] = a[2] * b[0] - a[0] * b[2];
1533 result[2] = a[0] * b[1] - a[1] * b[0];
1534 result[3] = 1.0;
1535 store_vector4( inst, machine, result );
1536 }
1537 break;
1538 case OPCODE_X2D: /* 2-D matrix transform */
1539 {
1540 GLfloat a[4], b[4], c[4], result[4];
1541 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1542 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1543 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1544 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1545 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1546 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1547 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1548 store_vector4( inst, machine, result );
1549 }
1550 break;
1551 case OPCODE_PRINT:
1552 {
1553 if (inst->SrcReg[0].File != -1) {
1554 GLfloat a[4];
1555 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1556 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1557 a[0], a[1], a[2], a[3]);
1558 }
1559 else {
1560 _mesa_printf("%s\n", (const char *) inst->Data);
1561 }
1562 }
1563 break;
1564 case OPCODE_END:
1565 return GL_TRUE;
1566 default:
1567 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1568 inst->Opcode);
1569 return GL_TRUE; /* return value doesn't matter */
1570
1571 }
1572 total++;
1573 if (total > MAX_EXEC) {
1574 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1575 return GL_TRUE;
1576 abort();
1577 }
1578 }
1579
1580#if FEATURE_MESA_program_debug
1581 CurrentMachine = NULL;
1582#endif
1583
1584 return GL_TRUE;
1585}