blob: 82c578f9830b8a4fa88a4ea71da842991eaca373 [file] [log] [blame]
Brian13e3b212007-02-22 16:09:40 -07001/*
2 * Mesa 3-D graphics library
3 * Version: 6.5.3
4 *
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31/*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38#include "glheader.h"
39#include "colormac.h"
40#include "context.h"
41#include "program.h"
42#include "prog_execute.h"
43#include "prog_instruction.h"
44#include "prog_parameter.h"
45#include "prog_print.h"
46#include "slang_library_noise.h"
47
48
49/* See comments below for info about this */
50#define LAMBDA_ZERO 1
51
52/* debug predicate */
53#define DEBUG_PROG 0
54
55
56#if FEATURE_MESA_program_debug
57static struct gl_program_machine *CurrentMachine = NULL;
58
59/**
60 * For GL_MESA_program_debug.
61 * Return current value (4*GLfloat) of a program register.
62 * Called via ctx->Driver.GetFragmentProgramRegister().
63 */
64void
65_mesa_get_program_register(GLcontext *ctx, enum register_file file,
66 GLuint index, GLfloat val[4])
67{
68 if (CurrentMachine) {
69 switch (file) {
70 case PROGRAM_INPUT:
71 if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
72 COPY_4V(val, CurrentMachine->VertAttribs[index]);
73 }
74 else {
75 COPY_4V(val,
Briane80d9012007-02-23 16:53:24 -070076 CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
Brian13e3b212007-02-22 16:09:40 -070077 }
78 break;
79 case PROGRAM_OUTPUT:
80 COPY_4V(val, CurrentMachine->Outputs[index]);
81 break;
82 case PROGRAM_TEMPORARY:
83 COPY_4V(val, CurrentMachine->Temporaries[index]);
84 break;
85 default:
86 _mesa_problem(NULL,
87 "bad register file in _swrast_get_program_register");
88 }
89 }
90}
91#endif /* FEATURE_MESA_program_debug */
92
93
94
95/**
96 * Return a pointer to the 4-element float vector specified by the given
97 * source register.
98 */
99static INLINE const GLfloat *
Briane80d9012007-02-23 16:53:24 -0700100get_register_pointer(GLcontext * ctx,
101 const struct prog_src_register *source,
102 const struct gl_program_machine *machine)
Brian13e3b212007-02-22 16:09:40 -0700103{
104 /* XXX relative addressing... */
105 switch (source->File) {
106 case PROGRAM_TEMPORARY:
107 ASSERT(source->Index < MAX_PROGRAM_TEMPS);
108 return machine->Temporaries[source->Index];
109
110 case PROGRAM_INPUT:
111 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
112 ASSERT(source->Index < VERT_ATTRIB_MAX);
113 return machine->VertAttribs[source->Index];
114 }
115 else {
116 ASSERT(source->Index < FRAG_ATTRIB_MAX);
117 return machine->Attribs[source->Index][machine->CurElement];
118 }
119
120 case PROGRAM_OUTPUT:
121 /* This is only for PRINT */
122 ASSERT(source->Index < FRAG_RESULT_MAX);
123 return machine->Outputs[source->Index];
124
125 case PROGRAM_LOCAL_PARAM:
126 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
127 return machine->CurProgram->LocalParams[source->Index];
128
129 case PROGRAM_ENV_PARAM:
130 ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
131 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
132 return ctx->VertexProgram.Parameters[source->Index];
133 else
134 return ctx->FragmentProgram.Parameters[source->Index];
135
136 case PROGRAM_STATE_VAR:
137 /* Fallthrough */
138 case PROGRAM_CONSTANT:
139 /* Fallthrough */
140 case PROGRAM_UNIFORM:
141 /* Fallthrough */
142 case PROGRAM_NAMED_PARAM:
143 ASSERT(source->Index <
144 (GLint) machine->CurProgram->Parameters->NumParameters);
145 return machine->CurProgram->Parameters->ParameterValues[source->Index];
146
147 default:
148 _mesa_problem(ctx,
149 "Invalid input register file %d in get_register_pointer()",
150 source->File);
151 return NULL;
152 }
153}
154
155
156/**
157 * Fetch a 4-element float vector from the given source register.
158 * Apply swizzling and negating as needed.
159 */
160static void
Briane80d9012007-02-23 16:53:24 -0700161fetch_vector4(GLcontext * ctx,
162 const struct prog_src_register *source,
163 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700164{
165 const GLfloat *src = get_register_pointer(ctx, source, machine);
166 ASSERT(src);
167
168 if (source->Swizzle == SWIZZLE_NOOP) {
169 /* no swizzling */
170 COPY_4V(result, src);
171 }
172 else {
173 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
174 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
175 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
176 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
177 result[0] = src[GET_SWZ(source->Swizzle, 0)];
178 result[1] = src[GET_SWZ(source->Swizzle, 1)];
179 result[2] = src[GET_SWZ(source->Swizzle, 2)];
180 result[3] = src[GET_SWZ(source->Swizzle, 3)];
181 }
182
183 if (source->NegateBase) {
184 result[0] = -result[0];
185 result[1] = -result[1];
186 result[2] = -result[2];
187 result[3] = -result[3];
188 }
189 if (source->Abs) {
190 result[0] = FABSF(result[0]);
191 result[1] = FABSF(result[1]);
192 result[2] = FABSF(result[2]);
193 result[3] = FABSF(result[3]);
194 }
195 if (source->NegateAbs) {
196 result[0] = -result[0];
197 result[1] = -result[1];
198 result[2] = -result[2];
199 result[3] = -result[3];
200 }
201}
202
203#if 0
204/**
205 * Fetch the derivative with respect to X for the given register.
206 * \return GL_TRUE if it was easily computed or GL_FALSE if we
207 * need to execute another instance of the program (ugh)!
208 */
209static GLboolean
Briane80d9012007-02-23 16:53:24 -0700210fetch_vector4_deriv(GLcontext * ctx,
211 const struct prog_src_register *source,
212 const SWspan * span,
213 char xOrY, GLint column, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700214{
215 GLfloat src[4];
216
217 ASSERT(xOrY == 'X' || xOrY == 'Y');
218
219 switch (source->Index) {
220 case FRAG_ATTRIB_WPOS:
221 if (xOrY == 'X') {
222 src[0] = 1.0;
223 src[1] = 0.0;
224 src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700225 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700226 src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
227 }
228 else {
229 src[0] = 0.0;
230 src[1] = 1.0;
231 src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
Briane80d9012007-02-23 16:53:24 -0700232 / ctx->DrawBuffer->_DepthMaxF;
Brian13e3b212007-02-22 16:09:40 -0700233 src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
234 }
235 break;
236 case FRAG_ATTRIB_COL0:
237 case FRAG_ATTRIB_COL1:
238 if (xOrY == 'X') {
239 src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
240 src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
241 src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
242 src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
243 }
244 else {
245 src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
246 src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
247 src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
248 src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
249 }
250 break;
251 case FRAG_ATTRIB_FOGC:
252 if (xOrY == 'X') {
253 src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
254 src[1] = 0.0;
255 src[2] = 0.0;
256 src[3] = 0.0;
257 }
258 else {
259 src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
260 src[1] = 0.0;
261 src[2] = 0.0;
262 src[3] = 0.0;
263 }
264 break;
265 default:
266 assert(source->Index < FRAG_ATTRIB_MAX);
267 /* texcoord or varying */
268 if (xOrY == 'X') {
269 /* this is a little tricky - I think I've got it right */
270 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
Briane80d9012007-02-23 16:53:24 -0700271 +
272 span->attrStepX[source->Index][3] *
273 column);
Brian13e3b212007-02-22 16:09:40 -0700274 src[0] = span->attrStepX[source->Index][0] * invQ;
275 src[1] = span->attrStepX[source->Index][1] * invQ;
276 src[2] = span->attrStepX[source->Index][2] * invQ;
277 src[3] = span->attrStepX[source->Index][3] * invQ;
278 }
279 else {
280 /* Tricky, as above, but in Y direction */
281 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
282 + span->attrStepY[source->Index][3]);
283 src[0] = span->attrStepY[source->Index][0] * invQ;
284 src[1] = span->attrStepY[source->Index][1] * invQ;
285 src[2] = span->attrStepY[source->Index][2] * invQ;
286 src[3] = span->attrStepY[source->Index][3] * invQ;
287 }
288 break;
289 }
290
291 result[0] = src[GET_SWZ(source->Swizzle, 0)];
292 result[1] = src[GET_SWZ(source->Swizzle, 1)];
293 result[2] = src[GET_SWZ(source->Swizzle, 2)];
294 result[3] = src[GET_SWZ(source->Swizzle, 3)];
295
296 if (source->NegateBase) {
297 result[0] = -result[0];
298 result[1] = -result[1];
299 result[2] = -result[2];
300 result[3] = -result[3];
301 }
302 if (source->Abs) {
303 result[0] = FABSF(result[0]);
304 result[1] = FABSF(result[1]);
305 result[2] = FABSF(result[2]);
306 result[3] = FABSF(result[3]);
307 }
308 if (source->NegateAbs) {
309 result[0] = -result[0];
310 result[1] = -result[1];
311 result[2] = -result[2];
312 result[3] = -result[3];
313 }
314 return GL_TRUE;
315}
316#endif
317
318
319/**
320 * As above, but only return result[0] element.
321 */
322static void
Briane80d9012007-02-23 16:53:24 -0700323fetch_vector1(GLcontext * ctx,
324 const struct prog_src_register *source,
325 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700326{
327 const GLfloat *src = get_register_pointer(ctx, source, machine);
328 ASSERT(src);
329
330 result[0] = src[GET_SWZ(source->Swizzle, 0)];
331
332 if (source->NegateBase) {
333 result[0] = -result[0];
334 }
335 if (source->Abs) {
336 result[0] = FABSF(result[0]);
337 }
338 if (source->NegateAbs) {
339 result[0] = -result[0];
340 }
341}
342
343
344/**
345 * Test value against zero and return GT, LT, EQ or UN if NaN.
346 */
347static INLINE GLuint
Briane80d9012007-02-23 16:53:24 -0700348generate_cc(float value)
Brian13e3b212007-02-22 16:09:40 -0700349{
350 if (value != value)
Briane80d9012007-02-23 16:53:24 -0700351 return COND_UN; /* NaN */
Brian13e3b212007-02-22 16:09:40 -0700352 if (value > 0.0F)
353 return COND_GT;
354 if (value < 0.0F)
355 return COND_LT;
356 return COND_EQ;
357}
358
359
360/**
361 * Test if the ccMaskRule is satisfied by the given condition code.
362 * Used to mask destination writes according to the current condition code.
363 */
364static INLINE GLboolean
365test_cc(GLuint condCode, GLuint ccMaskRule)
366{
367 switch (ccMaskRule) {
368 case COND_EQ: return (condCode == COND_EQ);
369 case COND_NE: return (condCode != COND_EQ);
370 case COND_LT: return (condCode == COND_LT);
371 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
372 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
373 case COND_GT: return (condCode == COND_GT);
374 case COND_TR: return GL_TRUE;
375 case COND_FL: return GL_FALSE;
376 default: return GL_TRUE;
377 }
378}
379
380
381/**
382 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
383 * or GL_FALSE to indicate result.
384 */
385static INLINE GLboolean
386eval_condition(const struct gl_program_machine *machine,
387 const struct prog_instruction *inst)
388{
389 const GLuint swizzle = inst->DstReg.CondSwizzle;
390 const GLuint condMask = inst->DstReg.CondMask;
391 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
392 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
393 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
394 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
395 return GL_TRUE;
396 }
397 else {
398 return GL_FALSE;
399 }
400}
401
402
403
404/**
405 * Store 4 floats into a register. Observe the instructions saturate and
406 * set-condition-code flags.
407 */
408static void
Briane80d9012007-02-23 16:53:24 -0700409store_vector4(const struct prog_instruction *inst,
410 struct gl_program_machine *machine, const GLfloat value[4])
Brian13e3b212007-02-22 16:09:40 -0700411{
412 const struct prog_dst_register *dest = &(inst->DstReg);
413 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
414 GLfloat *dstReg;
415 GLfloat dummyReg[4];
416 GLfloat clampedValue[4];
417 GLuint writeMask = dest->WriteMask;
418
419 switch (dest->File) {
Briane80d9012007-02-23 16:53:24 -0700420 case PROGRAM_OUTPUT:
421 dstReg = machine->Outputs[dest->Index];
422 break;
423 case PROGRAM_TEMPORARY:
424 dstReg = machine->Temporaries[dest->Index];
425 break;
426 case PROGRAM_WRITE_ONLY:
427 dstReg = dummyReg;
428 return;
429 default:
430 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
431 return;
Brian13e3b212007-02-22 16:09:40 -0700432 }
433
434#if 0
435 if (value[0] > 1.0e10 ||
436 IS_INF_OR_NAN(value[0]) ||
437 IS_INF_OR_NAN(value[1]) ||
Briane80d9012007-02-23 16:53:24 -0700438 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
Brian13e3b212007-02-22 16:09:40 -0700439 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
440#endif
441
442 if (clamp) {
443 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
444 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
445 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
446 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
447 value = clampedValue;
448 }
449
450 if (dest->CondMask != COND_TR) {
451 /* condition codes may turn off some writes */
452 if (writeMask & WRITEMASK_X) {
453 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
454 dest->CondMask))
455 writeMask &= ~WRITEMASK_X;
456 }
457 if (writeMask & WRITEMASK_Y) {
458 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
459 dest->CondMask))
460 writeMask &= ~WRITEMASK_Y;
461 }
462 if (writeMask & WRITEMASK_Z) {
463 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
464 dest->CondMask))
465 writeMask &= ~WRITEMASK_Z;
466 }
467 if (writeMask & WRITEMASK_W) {
468 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
469 dest->CondMask))
470 writeMask &= ~WRITEMASK_W;
471 }
472 }
473
474 if (writeMask & WRITEMASK_X)
475 dstReg[0] = value[0];
476 if (writeMask & WRITEMASK_Y)
477 dstReg[1] = value[1];
478 if (writeMask & WRITEMASK_Z)
479 dstReg[2] = value[2];
480 if (writeMask & WRITEMASK_W)
481 dstReg[3] = value[3];
482
483 if (inst->CondUpdate) {
484 if (writeMask & WRITEMASK_X)
485 machine->CondCodes[0] = generate_cc(value[0]);
486 if (writeMask & WRITEMASK_Y)
487 machine->CondCodes[1] = generate_cc(value[1]);
488 if (writeMask & WRITEMASK_Z)
489 machine->CondCodes[2] = generate_cc(value[2]);
490 if (writeMask & WRITEMASK_W)
491 machine->CondCodes[3] = generate_cc(value[3]);
492 }
493}
494
495
496#if 0
497/**
498 * Initialize a new machine state instance from an existing one, adding
499 * the partial derivatives onto the input registers.
500 * Used to implement DDX and DDY instructions in non-trivial cases.
501 */
502static void
Briane80d9012007-02-23 16:53:24 -0700503init_machine_deriv(GLcontext * ctx,
504 const struct gl_program_machine *machine,
505 const struct gl_fragment_program *program,
506 const SWspan * span, char xOrY,
507 struct gl_program_machine *dMachine)
Brian13e3b212007-02-22 16:09:40 -0700508{
509 GLuint attr;
510
511 ASSERT(xOrY == 'X' || xOrY == 'Y');
512
513 /* copy existing machine */
514 _mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
515
516 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
517 /* XXX also need to do this when using valgrind */
518 /* Clear temporary registers (undefined for ARB_f_p) */
Briane80d9012007-02-23 16:53:24 -0700519 _mesa_bzero((void *) machine->Temporaries,
520 MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
Brian13e3b212007-02-22 16:09:40 -0700521 }
522
523 /* Add derivatives */
524 if (program->Base.InputsRead & FRAG_BIT_WPOS) {
525 GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
526 if (xOrY == 'X') {
527 wpos[0] += 1.0F;
528 wpos[1] += 0.0F;
529 wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
530 wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
531 }
532 else {
533 wpos[0] += 0.0F;
534 wpos[1] += 1.0F;
535 wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
536 wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
537 }
538 }
539
540 /* primary, secondary colors */
541 for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
542 if (program->Base.InputsRead & (1 << attr)) {
543 GLfloat *col = machine->Attribs[attr][machine->CurElement];
544 if (xOrY == 'X') {
545 col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
546 col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
547 col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
548 col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
549 }
550 else {
551 col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
552 col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
553 col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
554 col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
555 }
556 }
557 }
558 if (program->Base.InputsRead & FRAG_BIT_FOGC) {
559 GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
560 if (xOrY == 'X') {
561 fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
562 }
563 else {
564 fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
565 }
566 }
567 /* texcoord and varying vars */
568 for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
569 if (program->Base.InputsRead & (1 << attr)) {
570 GLfloat *val = machine->Attribs[attr][machine->CurElement];
571 /* XXX perspective-correct interpolation */
572 if (xOrY == 'X') {
573 val[0] += span->attrStepX[attr][0];
574 val[1] += span->attrStepX[attr][1];
575 val[2] += span->attrStepX[attr][2];
576 val[3] += span->attrStepX[attr][3];
577 }
578 else {
579 val[0] += span->attrStepY[attr][0];
580 val[1] += span->attrStepY[attr][1];
581 val[2] += span->attrStepY[attr][2];
582 val[3] += span->attrStepY[attr][3];
583 }
584 }
585 }
586
587 /* init condition codes */
588 dMachine->CondCodes[0] = COND_EQ;
589 dMachine->CondCodes[1] = COND_EQ;
590 dMachine->CondCodes[2] = COND_EQ;
591 dMachine->CondCodes[3] = COND_EQ;
592}
593#endif
594
595
596/**
597 * Execute the given vertex/fragment program.
598 *
599 * \param ctx - rendering context
600 * \param program - the fragment program to execute
601 * \param machine - machine state (register file)
602 * \param maxInst - max number of instructions to execute
603 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
604 */
605GLboolean
Briane80d9012007-02-23 16:53:24 -0700606_mesa_execute_program(GLcontext * ctx,
Brian13e3b212007-02-22 16:09:40 -0700607 const struct gl_program *program, GLuint maxInst,
608 struct gl_program_machine *machine, GLuint element)
609{
610 const GLuint MAX_EXEC = 10000;
611 GLint pc, total = 0;
612
613 machine->CurProgram = program;
614
615 if (DEBUG_PROG) {
616 printf("execute program %u --------------------\n", program->Id);
617 }
618
619#if FEATURE_MESA_program_debug
620 CurrentMachine = machine;
621#endif
622
623 for (pc = 0; pc < maxInst; pc++) {
624 const struct prog_instruction *inst = program->Instructions + pc;
625
626#if FEATURE_MESA_program_debug
627 if (ctx->FragmentProgram.CallbackEnabled &&
628 ctx->FragmentProgram.Callback) {
629 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
630 ctx->FragmentProgram.Callback(program->Target,
631 ctx->FragmentProgram.CallbackData);
632 }
633#endif
634
635 if (DEBUG_PROG) {
636 _mesa_print_instruction(inst);
637 }
638
639 switch (inst->Opcode) {
Briane80d9012007-02-23 16:53:24 -0700640 case OPCODE_ABS:
641 {
642 GLfloat a[4], result[4];
643 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
644 result[0] = FABSF(a[0]);
645 result[1] = FABSF(a[1]);
646 result[2] = FABSF(a[2]);
647 result[3] = FABSF(a[3]);
648 store_vector4(inst, machine, result);
649 }
650 break;
651 case OPCODE_ADD:
652 {
653 GLfloat a[4], b[4], result[4];
654 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
655 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
656 result[0] = a[0] + b[0];
657 result[1] = a[1] + b[1];
658 result[2] = a[2] + b[2];
659 result[3] = a[3] + b[3];
660 store_vector4(inst, machine, result);
661 if (DEBUG_PROG) {
662 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
663 result[0], result[1], result[2], result[3],
664 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700665 }
Briane80d9012007-02-23 16:53:24 -0700666 }
667 break;
668 case OPCODE_BGNLOOP:
669 /* no-op */
670 break;
671 case OPCODE_ENDLOOP:
672 /* subtract 1 here since pc is incremented by for(pc) loop */
673 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
674 break;
675 case OPCODE_BGNSUB: /* begin subroutine */
676 break;
677 case OPCODE_ENDSUB: /* end subroutine */
678 break;
679 case OPCODE_BRA: /* branch (conditional) */
680 /* fall-through */
681 case OPCODE_BRK: /* break out of loop (conditional) */
682 /* fall-through */
683 case OPCODE_CONT: /* continue loop (conditional) */
684 if (eval_condition(machine, inst)) {
685 /* take branch */
686 /* Subtract 1 here since we'll do pc++ at end of for-loop */
687 pc = inst->BranchTarget - 1;
688 }
689 break;
690 case OPCODE_CAL: /* Call subroutine (conditional) */
691 if (eval_condition(machine, inst)) {
692 /* call the subroutine */
693 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
694 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -0700695 }
Briane80d9012007-02-23 16:53:24 -0700696 machine->CallStack[machine->StackDepth++] = pc + 1;
697 pc = inst->BranchTarget; /* XXX - 1 ??? */
698 }
699 break;
700 case OPCODE_CMP:
701 {
702 GLfloat a[4], b[4], c[4], result[4];
703 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
704 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
705 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
706 result[0] = a[0] < 0.0F ? b[0] : c[0];
707 result[1] = a[1] < 0.0F ? b[1] : c[1];
708 result[2] = a[2] < 0.0F ? b[2] : c[2];
709 result[3] = a[3] < 0.0F ? b[3] : c[3];
710 store_vector4(inst, machine, result);
711 }
712 break;
713 case OPCODE_COS:
714 {
715 GLfloat a[4], result[4];
716 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
717 result[0] = result[1] = result[2] = result[3]
718 = (GLfloat) _mesa_cos(a[0]);
719 store_vector4(inst, machine, result);
720 }
721 break;
722 case OPCODE_DDX: /* Partial derivative with respect to X */
723 {
Brian13e3b212007-02-22 16:09:40 -0700724#if 0
Briane80d9012007-02-23 16:53:24 -0700725 GLfloat a[4], aNext[4], result[4];
726 struct gl_program_machine dMachine;
727 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
728 column, result)) {
729 /* This is tricky. Make a copy of the current machine state,
730 * increment the input registers by the dx or dy partial
731 * derivatives, then re-execute the program up to the
732 * preceeding instruction, then fetch the source register.
733 * Finally, find the difference in the register values for
734 * the original and derivative runs.
735 */
736 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
737 init_machine_deriv(ctx, machine, program, span,
738 'X', &dMachine);
739 execute_program(ctx, program, pc, &dMachine, span, column);
740 fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
741 aNext);
742 result[0] = aNext[0] - a[0];
743 result[1] = aNext[1] - a[1];
744 result[2] = aNext[2] - a[2];
745 result[3] = aNext[3] - a[3];
Brian13e3b212007-02-22 16:09:40 -0700746 }
Briane80d9012007-02-23 16:53:24 -0700747 store_vector4(inst, machine, result);
748#else
749 static const GLfloat result[4] = { 0, 0, 0, 0 };
750 store_vector4(inst, machine, result);
751#endif
752 }
753 break;
754 case OPCODE_DDY: /* Partial derivative with respect to Y */
755 {
Brian13e3b212007-02-22 16:09:40 -0700756#if 0
Briane80d9012007-02-23 16:53:24 -0700757 GLfloat a[4], aNext[4], result[4];
758 struct gl_program_machine dMachine;
759 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
760 column, result)) {
761 init_machine_deriv(ctx, machine, program, span,
762 'Y', &dMachine);
763 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, a);
764 execute_program(ctx, program, pc, &dMachine, span, column);
765 fetch_vector4(ctx, &inst->SrcReg[0], &dMachine, program,
766 aNext);
767 result[0] = aNext[0] - a[0];
768 result[1] = aNext[1] - a[1];
769 result[2] = aNext[2] - a[2];
770 result[3] = aNext[3] - a[3];
771 }
772 store_vector4(inst, machine, result);
Brian13e3b212007-02-22 16:09:40 -0700773#else
Briane80d9012007-02-23 16:53:24 -0700774 static const GLfloat result[4] = { 0, 0, 0, 0 };
775 store_vector4(inst, machine, result);
Brian13e3b212007-02-22 16:09:40 -0700776#endif
Briane80d9012007-02-23 16:53:24 -0700777 }
778 break;
779 case OPCODE_DP3:
780 {
781 GLfloat a[4], b[4], result[4];
782 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
783 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
784 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
785 store_vector4(inst, machine, result);
786 if (DEBUG_PROG) {
787 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
788 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
Brian13e3b212007-02-22 16:09:40 -0700789 }
Briane80d9012007-02-23 16:53:24 -0700790 }
791 break;
792 case OPCODE_DP4:
793 {
794 GLfloat a[4], b[4], result[4];
795 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
796 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
797 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
798 store_vector4(inst, machine, result);
799 if (DEBUG_PROG) {
800 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
801 result[0], a[0], a[1], a[2], a[3],
802 b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700803 }
Briane80d9012007-02-23 16:53:24 -0700804 }
805 break;
806 case OPCODE_DPH:
807 {
808 GLfloat a[4], b[4], result[4];
809 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
810 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
811 result[0] = result[1] = result[2] = result[3] =
812 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
813 store_vector4(inst, machine, result);
814 }
815 break;
816 case OPCODE_DST: /* Distance vector */
817 {
818 GLfloat a[4], b[4], result[4];
819 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
820 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
821 result[0] = 1.0F;
822 result[1] = a[1] * b[1];
823 result[2] = a[2];
824 result[3] = b[3];
825 store_vector4(inst, machine, result);
826 }
827 break;
828 case OPCODE_EX2: /* Exponential base 2 */
829 {
830 GLfloat a[4], result[4];
831 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
832 result[0] = result[1] = result[2] = result[3] =
833 (GLfloat) _mesa_pow(2.0, a[0]);
834 store_vector4(inst, machine, result);
835 }
836 break;
837 case OPCODE_FLR:
838 {
839 GLfloat a[4], result[4];
840 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
841 result[0] = FLOORF(a[0]);
842 result[1] = FLOORF(a[1]);
843 result[2] = FLOORF(a[2]);
844 result[3] = FLOORF(a[3]);
845 store_vector4(inst, machine, result);
846 }
847 break;
848 case OPCODE_FRC:
849 {
850 GLfloat a[4], result[4];
851 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
852 result[0] = a[0] - FLOORF(a[0]);
853 result[1] = a[1] - FLOORF(a[1]);
854 result[2] = a[2] - FLOORF(a[2]);
855 result[3] = a[3] - FLOORF(a[3]);
856 store_vector4(inst, machine, result);
857 }
858 break;
859 case OPCODE_IF:
860 if (eval_condition(machine, inst)) {
861 /* do if-clause (just continue execution) */
862 }
863 else {
864 /* go to the instruction after ELSE or ENDIF */
Brian13e3b212007-02-22 16:09:40 -0700865 assert(inst->BranchTarget >= 0);
866 pc = inst->BranchTarget - 1;
Briane80d9012007-02-23 16:53:24 -0700867 }
868 break;
869 case OPCODE_ELSE:
870 /* goto ENDIF */
871 assert(inst->BranchTarget >= 0);
872 pc = inst->BranchTarget - 1;
873 break;
874 case OPCODE_ENDIF:
875 /* nothing */
876 break;
877 case OPCODE_INT: /* float to int */
878 {
879 GLfloat a[4], result[4];
880 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
881 result[0] = (GLfloat) (GLint) a[0];
882 result[1] = (GLfloat) (GLint) a[1];
883 result[2] = (GLfloat) (GLint) a[2];
884 result[3] = (GLfloat) (GLint) a[3];
885 store_vector4(inst, machine, result);
886 }
887 break;
888 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
889 if (eval_condition(machine, inst)) {
890 return GL_FALSE;
891 }
892 break;
893 case OPCODE_KIL: /* ARB_f_p only */
894 {
895 GLfloat a[4];
896 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
897 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
Brian13e3b212007-02-22 16:09:40 -0700898 return GL_FALSE;
899 }
Briane80d9012007-02-23 16:53:24 -0700900 }
901 break;
902 case OPCODE_LG2: /* log base 2 */
903 {
904 GLfloat a[4], result[4];
905 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
906 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
907 store_vector4(inst, machine, result);
908 }
909 break;
910 case OPCODE_LIT:
911 {
912 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
913 GLfloat a[4], result[4];
914 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
915 a[0] = MAX2(a[0], 0.0F);
916 a[1] = MAX2(a[1], 0.0F);
917 /* XXX ARB version clamps a[3], NV version doesn't */
918 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
919 result[0] = 1.0F;
920 result[1] = a[0];
921 /* XXX we could probably just use pow() here */
922 if (a[0] > 0.0F) {
923 if (a[1] == 0.0 && a[3] == 0.0)
924 result[2] = 1.0;
925 else
926 result[2] = EXPF(a[3] * LOGF(a[1]));
Brian13e3b212007-02-22 16:09:40 -0700927 }
Briane80d9012007-02-23 16:53:24 -0700928 else {
929 result[2] = 0.0;
Brian13e3b212007-02-22 16:09:40 -0700930 }
Briane80d9012007-02-23 16:53:24 -0700931 result[3] = 1.0F;
932 store_vector4(inst, machine, result);
933 if (DEBUG_PROG) {
934 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
935 result[0], result[1], result[2], result[3],
936 a[0], a[1], a[2], a[3]);
Brian13e3b212007-02-22 16:09:40 -0700937 }
Briane80d9012007-02-23 16:53:24 -0700938 }
939 break;
940 case OPCODE_LRP:
941 {
942 GLfloat a[4], b[4], c[4], result[4];
943 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
944 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
945 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
946 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
947 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
948 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
949 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
950 store_vector4(inst, machine, result);
951 if (DEBUG_PROG) {
952 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
953 "(%g %g %g %g), (%g %g %g %g)\n",
954 result[0], result[1], result[2], result[3],
955 a[0], a[1], a[2], a[3],
956 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -0700957 }
Briane80d9012007-02-23 16:53:24 -0700958 }
959 break;
960 case OPCODE_MAD:
961 {
962 GLfloat a[4], b[4], c[4], result[4];
963 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
964 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
965 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
966 result[0] = a[0] * b[0] + c[0];
967 result[1] = a[1] * b[1] + c[1];
968 result[2] = a[2] * b[2] + c[2];
969 result[3] = a[3] * b[3] + c[3];
970 store_vector4(inst, machine, result);
971 if (DEBUG_PROG) {
972 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
973 "(%g %g %g %g) + (%g %g %g %g)\n",
974 result[0], result[1], result[2], result[3],
975 a[0], a[1], a[2], a[3],
976 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -0700977 }
Briane80d9012007-02-23 16:53:24 -0700978 }
979 break;
980 case OPCODE_MAX:
981 {
982 GLfloat a[4], b[4], result[4];
983 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
984 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
985 result[0] = MAX2(a[0], b[0]);
986 result[1] = MAX2(a[1], b[1]);
987 result[2] = MAX2(a[2], b[2]);
988 result[3] = MAX2(a[3], b[3]);
989 store_vector4(inst, machine, result);
990 if (DEBUG_PROG) {
991 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
992 result[0], result[1], result[2], result[3],
993 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700994 }
Briane80d9012007-02-23 16:53:24 -0700995 }
996 break;
997 case OPCODE_MIN:
998 {
999 GLfloat a[4], b[4], result[4];
1000 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1001 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1002 result[0] = MIN2(a[0], b[0]);
1003 result[1] = MIN2(a[1], b[1]);
1004 result[2] = MIN2(a[2], b[2]);
1005 result[3] = MIN2(a[3], b[3]);
1006 store_vector4(inst, machine, result);
1007 }
1008 break;
1009 case OPCODE_MOV:
1010 {
1011 GLfloat result[4];
1012 fetch_vector4(ctx, &inst->SrcReg[0], machine, result);
1013 store_vector4(inst, machine, result);
1014 if (DEBUG_PROG) {
1015 printf("MOV (%g %g %g %g)\n",
1016 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001017 }
Briane80d9012007-02-23 16:53:24 -07001018 }
1019 break;
1020 case OPCODE_MUL:
1021 {
1022 GLfloat a[4], b[4], result[4];
1023 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1024 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1025 result[0] = a[0] * b[0];
1026 result[1] = a[1] * b[1];
1027 result[2] = a[2] * b[2];
1028 result[3] = a[3] * b[3];
1029 store_vector4(inst, machine, result);
1030 if (DEBUG_PROG) {
1031 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1032 result[0], result[1], result[2], result[3],
1033 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001034 }
Briane80d9012007-02-23 16:53:24 -07001035 }
1036 break;
1037 case OPCODE_NOISE1:
1038 {
1039 GLfloat a[4], result[4];
1040 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1041 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001042 result[1] =
Briane80d9012007-02-23 16:53:24 -07001043 result[2] = result[3] = _slang_library_noise1(a[0]);
1044 store_vector4(inst, machine, result);
1045 }
1046 break;
1047 case OPCODE_NOISE2:
1048 {
1049 GLfloat a[4], result[4];
1050 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1051 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001052 result[1] =
Briane80d9012007-02-23 16:53:24 -07001053 result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1054 store_vector4(inst, machine, result);
1055 }
1056 break;
1057 case OPCODE_NOISE3:
1058 {
1059 GLfloat a[4], result[4];
1060 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1061 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001062 result[1] =
1063 result[2] =
1064 result[3] = _slang_library_noise3(a[0], a[1], a[2]);
Briane80d9012007-02-23 16:53:24 -07001065 store_vector4(inst, machine, result);
1066 }
1067 break;
1068 case OPCODE_NOISE4:
1069 {
1070 GLfloat a[4], result[4];
1071 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1072 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001073 result[1] =
1074 result[2] =
1075 result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
Briane80d9012007-02-23 16:53:24 -07001076 store_vector4(inst, machine, result);
1077 }
1078 break;
1079 case OPCODE_NOP:
1080 break;
1081 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1082 {
1083 GLfloat a[4], result[4];
1084 GLhalfNV hx, hy;
1085 GLuint *rawResult = (GLuint *) result;
1086 GLuint twoHalves;
1087 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1088 hx = _mesa_float_to_half(a[0]);
1089 hy = _mesa_float_to_half(a[1]);
1090 twoHalves = hx | (hy << 16);
1091 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1092 = twoHalves;
1093 store_vector4(inst, machine, result);
1094 }
1095 break;
1096 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1097 {
1098 GLfloat a[4], result[4];
1099 GLuint usx, usy, *rawResult = (GLuint *) result;
1100 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1101 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1102 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1103 usx = IROUND(a[0] * 65535.0F);
1104 usy = IROUND(a[1] * 65535.0F);
1105 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1106 = usx | (usy << 16);
1107 store_vector4(inst, machine, result);
1108 }
1109 break;
1110 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1111 {
1112 GLfloat a[4], result[4];
1113 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1114 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1115 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1116 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1117 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1118 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1119 ubx = IROUND(127.0F * a[0] + 128.0F);
1120 uby = IROUND(127.0F * a[1] + 128.0F);
1121 ubz = IROUND(127.0F * a[2] + 128.0F);
1122 ubw = IROUND(127.0F * a[3] + 128.0F);
1123 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1124 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1125 store_vector4(inst, machine, result);
1126 }
1127 break;
1128 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1129 {
1130 GLfloat a[4], result[4];
1131 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1132 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1133 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1134 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1135 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1136 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1137 ubx = IROUND(255.0F * a[0]);
1138 uby = IROUND(255.0F * a[1]);
1139 ubz = IROUND(255.0F * a[2]);
1140 ubw = IROUND(255.0F * a[3]);
1141 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1142 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1143 store_vector4(inst, machine, result);
1144 }
1145 break;
1146 case OPCODE_POW:
1147 {
1148 GLfloat a[4], b[4], result[4];
1149 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1150 fetch_vector1(ctx, &inst->SrcReg[1], machine, b);
1151 result[0] = result[1] = result[2] = result[3]
1152 = (GLfloat) _mesa_pow(a[0], b[0]);
1153 store_vector4(inst, machine, result);
1154 }
1155 break;
1156 case OPCODE_RCP:
1157 {
1158 GLfloat a[4], result[4];
1159 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1160 if (DEBUG_PROG) {
1161 if (a[0] == 0)
1162 printf("RCP(0)\n");
1163 else if (IS_INF_OR_NAN(a[0]))
1164 printf("RCP(inf)\n");
Brian13e3b212007-02-22 16:09:40 -07001165 }
Briane80d9012007-02-23 16:53:24 -07001166 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1167 store_vector4(inst, machine, result);
1168 }
1169 break;
1170 case OPCODE_RET: /* return from subroutine (conditional) */
1171 if (eval_condition(machine, inst)) {
1172 if (machine->StackDepth == 0) {
1173 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -07001174 }
Briane80d9012007-02-23 16:53:24 -07001175 pc = machine->CallStack[--machine->StackDepth];
1176 }
1177 break;
1178 case OPCODE_RFL: /* reflection vector */
1179 {
1180 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1181 fetch_vector4(ctx, &inst->SrcReg[0], machine, axis);
1182 fetch_vector4(ctx, &inst->SrcReg[1], machine, dir);
1183 tmpW = DOT3(axis, axis);
1184 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1185 result[0] = tmpX * axis[0] - dir[0];
1186 result[1] = tmpX * axis[1] - dir[1];
1187 result[2] = tmpX * axis[2] - dir[2];
1188 /* result[3] is never written! XXX enforce in parser! */
1189 store_vector4(inst, machine, result);
1190 }
1191 break;
1192 case OPCODE_RSQ: /* 1 / sqrt() */
1193 {
1194 GLfloat a[4], result[4];
1195 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1196 a[0] = FABSF(a[0]);
1197 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1198 store_vector4(inst, machine, result);
1199 if (DEBUG_PROG) {
1200 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
Brian13e3b212007-02-22 16:09:40 -07001201 }
Briane80d9012007-02-23 16:53:24 -07001202 }
1203 break;
1204 case OPCODE_SCS: /* sine and cos */
1205 {
1206 GLfloat a[4], result[4];
1207 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1208 result[0] = (GLfloat) _mesa_cos(a[0]);
1209 result[1] = (GLfloat) _mesa_sin(a[0]);
1210 result[2] = 0.0; /* undefined! */
1211 result[3] = 0.0; /* undefined! */
1212 store_vector4(inst, machine, result);
1213 }
1214 break;
1215 case OPCODE_SEQ: /* set on equal */
1216 {
1217 GLfloat a[4], b[4], result[4];
1218 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1219 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1220 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1221 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1222 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1223 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1224 store_vector4(inst, machine, result);
1225 }
1226 break;
1227 case OPCODE_SFL: /* set false, operands ignored */
1228 {
1229 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1230 store_vector4(inst, machine, result);
1231 }
1232 break;
1233 case OPCODE_SGE: /* set on greater or equal */
1234 {
1235 GLfloat a[4], b[4], result[4];
1236 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1237 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1238 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1239 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1240 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1241 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1242 store_vector4(inst, machine, result);
1243 }
1244 break;
1245 case OPCODE_SGT: /* set on greater */
1246 {
1247 GLfloat a[4], b[4], result[4];
1248 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1249 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1250 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1251 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1252 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1253 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1254 store_vector4(inst, machine, result);
1255 if (DEBUG_PROG) {
1256 printf("SGT %g %g %g %g\n",
1257 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001258 }
Briane80d9012007-02-23 16:53:24 -07001259 }
1260 break;
1261 case OPCODE_SIN:
1262 {
1263 GLfloat a[4], result[4];
1264 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1265 result[0] = result[1] = result[2] = result[3]
1266 = (GLfloat) _mesa_sin(a[0]);
1267 store_vector4(inst, machine, result);
1268 }
1269 break;
1270 case OPCODE_SLE: /* set on less or equal */
1271 {
1272 GLfloat a[4], b[4], result[4];
1273 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1274 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1275 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1276 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1277 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1278 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1279 store_vector4(inst, machine, result);
1280 }
1281 break;
1282 case OPCODE_SLT: /* set on less */
1283 {
1284 GLfloat a[4], b[4], result[4];
1285 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1286 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1287 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1288 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1289 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1290 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1291 store_vector4(inst, machine, result);
1292 }
1293 break;
1294 case OPCODE_SNE: /* set on not equal */
1295 {
1296 GLfloat a[4], b[4], result[4];
1297 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1298 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1299 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1300 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1301 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1302 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1303 store_vector4(inst, machine, result);
1304 }
1305 break;
1306 case OPCODE_STR: /* set true, operands ignored */
1307 {
1308 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1309 store_vector4(inst, machine, result);
1310 }
1311 break;
1312 case OPCODE_SUB:
1313 {
1314 GLfloat a[4], b[4], result[4];
1315 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1316 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1317 result[0] = a[0] - b[0];
1318 result[1] = a[1] - b[1];
1319 result[2] = a[2] - b[2];
1320 result[3] = a[3] - b[3];
1321 store_vector4(inst, machine, result);
1322 if (DEBUG_PROG) {
1323 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1324 result[0], result[1], result[2], result[3],
1325 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001326 }
Briane80d9012007-02-23 16:53:24 -07001327 }
1328 break;
1329 case OPCODE_SWZ: /* extended swizzle */
1330 {
1331 const struct prog_src_register *source = &inst->SrcReg[0];
1332 const GLfloat *src = get_register_pointer(ctx, source, machine);
1333 GLfloat result[4];
1334 GLuint i;
1335 for (i = 0; i < 4; i++) {
1336 const GLuint swz = GET_SWZ(source->Swizzle, i);
1337 if (swz == SWIZZLE_ZERO)
1338 result[i] = 0.0;
1339 else if (swz == SWIZZLE_ONE)
1340 result[i] = 1.0;
Brian13e3b212007-02-22 16:09:40 -07001341 else {
Briane80d9012007-02-23 16:53:24 -07001342 ASSERT(swz >= 0);
1343 ASSERT(swz <= 3);
1344 result[i] = src[swz];
Brian13e3b212007-02-22 16:09:40 -07001345 }
Briane80d9012007-02-23 16:53:24 -07001346 if (source->NegateBase & (1 << i))
1347 result[i] = -result[i];
Brian13e3b212007-02-22 16:09:40 -07001348 }
Briane80d9012007-02-23 16:53:24 -07001349 store_vector4(inst, machine, result);
1350 }
1351 break;
1352 case OPCODE_TEX: /* Both ARB and NV frag prog */
1353 /* Texel lookup */
1354 {
1355 /* Note: only use the precomputed lambda value when we're
1356 * sampling texture unit [K] with texcoord[K].
1357 * Otherwise, the lambda value may have no relation to the
1358 * instruction's texcoord or texture image. Using the wrong
1359 * lambda is usually bad news.
1360 * The rest of the time, just use zero (until we get a more
1361 * sophisticated way of computing lambda).
1362 */
1363 GLfloat coord[4], color[4], lambda;
1364#if 0
1365 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1366 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1367 lambda = span->array->lambda[inst->TexSrcUnit][column];
1368 else
1369#endif
1370 lambda = 0.0;
1371 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1372 machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit,
1373 color);
1374 if (DEBUG_PROG) {
1375 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1376 "lod %f\n",
1377 color[0], color[1], color[2], color[3],
1378 inst->TexSrcUnit,
1379 coord[0], coord[1], coord[2], coord[3], lambda);
1380 }
1381 store_vector4(inst, machine, color);
1382 }
1383 break;
1384 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1385 /* Texel lookup with LOD bias */
1386 {
1387 const struct gl_texture_unit *texUnit
1388 = &ctx->Texture.Unit[inst->TexSrcUnit];
1389 GLfloat coord[4], color[4], lambda, bias;
1390#if 0
1391 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1392 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1393 lambda = span->array->lambda[inst->TexSrcUnit][column];
1394 else
1395#endif
1396 lambda = 0.0;
1397 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1398 /* coord[3] is the bias to add to lambda */
1399 bias = texUnit->LodBias + coord[3];
1400 if (texUnit->_Current)
1401 bias += texUnit->_Current->LodBias;
1402 machine->FetchTexelLod(ctx, coord, lambda + bias,
1403 inst->TexSrcUnit, color);
1404 store_vector4(inst, machine, color);
1405 }
1406 break;
1407 case OPCODE_TXD: /* GL_NV_fragment_program only */
1408 /* Texture lookup w/ partial derivatives for LOD */
1409 {
1410 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1411 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1412 fetch_vector4(ctx, &inst->SrcReg[1], machine, dtdx);
1413 fetch_vector4(ctx, &inst->SrcReg[2], machine, dtdy);
1414 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1415 inst->TexSrcUnit, color);
1416 store_vector4(inst, machine, color);
1417 }
1418 break;
1419 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1420 /* Texture lookup w/ projective divide */
1421 {
1422 GLfloat texcoord[4], color[4], lambda;
1423#if 0
1424 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1425 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1426 lambda = span->array->lambda[inst->TexSrcUnit][column];
1427 else
1428#endif
1429 lambda = 0.0;
1430 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1431 /* Not so sure about this test - if texcoord[3] is
1432 * zero, we'd probably be fine except for an ASSERT in
1433 * IROUND_POS() which gets triggered by the inf values created.
1434 */
1435 if (texcoord[3] != 0.0) {
1436 texcoord[0] /= texcoord[3];
1437 texcoord[1] /= texcoord[3];
1438 texcoord[2] /= texcoord[3];
1439 }
1440 machine->FetchTexelLod(ctx, texcoord, lambda,
1441 inst->TexSrcUnit, color);
1442 store_vector4(inst, machine, color);
1443 }
1444 break;
1445 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1446 /* Texture lookup w/ projective divide */
1447 {
1448 GLfloat texcoord[4], color[4], lambda;
1449#if 0
1450 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1451 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
1452 lambda = span->array->lambda[inst->TexSrcUnit][column];
1453 else
1454#endif
1455 lambda = 0.0;
1456 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1457 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1458 texcoord[3] != 0.0) {
1459 texcoord[0] /= texcoord[3];
1460 texcoord[1] /= texcoord[3];
1461 texcoord[2] /= texcoord[3];
1462 }
1463 machine->FetchTexelLod(ctx, texcoord, lambda,
1464 inst->TexSrcUnit, color);
1465 store_vector4(inst, machine, color);
1466 }
1467 break;
1468 case OPCODE_UP2H: /* unpack two 16-bit floats */
1469 {
1470 GLfloat a[4], result[4];
1471 const GLuint *rawBits = (const GLuint *) a;
1472 GLhalfNV hx, hy;
1473 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1474 hx = rawBits[0] & 0xffff;
1475 hy = rawBits[0] >> 16;
1476 result[0] = result[2] = _mesa_half_to_float(hx);
1477 result[1] = result[3] = _mesa_half_to_float(hy);
1478 store_vector4(inst, machine, result);
1479 }
1480 break;
1481 case OPCODE_UP2US: /* unpack two GLushorts */
1482 {
1483 GLfloat a[4], result[4];
1484 const GLuint *rawBits = (const GLuint *) a;
1485 GLushort usx, usy;
1486 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1487 usx = rawBits[0] & 0xffff;
1488 usy = rawBits[0] >> 16;
1489 result[0] = result[2] = usx * (1.0f / 65535.0f);
1490 result[1] = result[3] = usy * (1.0f / 65535.0f);
1491 store_vector4(inst, machine, result);
1492 }
1493 break;
1494 case OPCODE_UP4B: /* unpack four GLbytes */
1495 {
1496 GLfloat a[4], result[4];
1497 const GLuint *rawBits = (const GLuint *) a;
1498 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1499 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1500 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1501 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1502 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1503 store_vector4(inst, machine, result);
1504 }
1505 break;
1506 case OPCODE_UP4UB: /* unpack four GLubytes */
1507 {
1508 GLfloat a[4], result[4];
1509 const GLuint *rawBits = (const GLuint *) a;
1510 fetch_vector1(ctx, &inst->SrcReg[0], machine, a);
1511 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1512 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1513 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1514 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1515 store_vector4(inst, machine, result);
1516 }
1517 break;
1518 case OPCODE_XPD: /* cross product */
1519 {
1520 GLfloat a[4], b[4], result[4];
1521 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1522 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1523 result[0] = a[1] * b[2] - a[2] * b[1];
1524 result[1] = a[2] * b[0] - a[0] * b[2];
1525 result[2] = a[0] * b[1] - a[1] * b[0];
1526 result[3] = 1.0;
1527 store_vector4(inst, machine, result);
1528 }
1529 break;
1530 case OPCODE_X2D: /* 2-D matrix transform */
1531 {
1532 GLfloat a[4], b[4], c[4], result[4];
1533 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1534 fetch_vector4(ctx, &inst->SrcReg[1], machine, b);
1535 fetch_vector4(ctx, &inst->SrcReg[2], machine, c);
1536 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1537 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1538 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1539 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1540 store_vector4(inst, machine, result);
1541 }
1542 break;
1543 case OPCODE_PRINT:
1544 {
1545 if (inst->SrcReg[0].File != -1) {
1546 GLfloat a[4];
1547 fetch_vector4(ctx, &inst->SrcReg[0], machine, a);
1548 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1549 a[0], a[1], a[2], a[3]);
1550 }
1551 else {
1552 _mesa_printf("%s\n", (const char *) inst->Data);
1553 }
1554 }
1555 break;
1556 case OPCODE_END:
1557 return GL_TRUE;
1558 default:
1559 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1560 inst->Opcode);
1561 return GL_TRUE; /* return value doesn't matter */
Brian13e3b212007-02-22 16:09:40 -07001562
1563 }
Briane80d9012007-02-23 16:53:24 -07001564
Brian13e3b212007-02-22 16:09:40 -07001565 total++;
1566 if (total > MAX_EXEC) {
1567 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1568 return GL_TRUE;
1569 abort();
1570 }
Briane80d9012007-02-23 16:53:24 -07001571
1572 } /* for pc */
Brian13e3b212007-02-22 16:09:40 -07001573
1574#if FEATURE_MESA_program_debug
1575 CurrentMachine = NULL;
1576#endif
1577
1578 return GL_TRUE;
1579}