blob: 1ef801a17dd5183bc9d5ecb72a647a7eddad98e7 [file] [log] [blame]
Brian13e3b212007-02-22 16:09:40 -07001/*
2 * Mesa 3-D graphics library
Brian Paulf4361542008-11-11 10:47:10 -07003 * Version: 7.3
Brian13e3b212007-02-22 16:09:40 -07004 *
Brian Paulf4361542008-11-11 10:47:10 -07005 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
Brian13e3b212007-02-22 16:09:40 -07006 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31/*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
Brian Paulbbd28712008-09-18 12:26:54 -060038#include "main/glheader.h"
39#include "main/colormac.h"
40#include "main/context.h"
Brian13e3b212007-02-22 16:09:40 -070041#include "prog_execute.h"
42#include "prog_instruction.h"
43#include "prog_parameter.h"
44#include "prog_print.h"
Brian Paul702b5b02008-12-15 18:37:39 -070045#include "prog_noise.h"
Brian13e3b212007-02-22 16:09:40 -070046
47
Brian13e3b212007-02-22 16:09:40 -070048/* debug predicate */
49#define DEBUG_PROG 0
50
51
Brianf183a2d2007-02-23 17:14:30 -070052/**
53 * Set x to positive or negative infinity.
54 */
55#if defined(USE_IEEE) || defined(_WIN32)
56#define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
57#define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
58#elif defined(VMS)
59#define SET_POS_INFINITY(x) x = __MAXFLOAT
60#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
61#else
62#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
63#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
64#endif
65
66#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
67
68
69static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
70
71
72
Brian13e3b212007-02-22 16:09:40 -070073/**
74 * Return a pointer to the 4-element float vector specified by the given
75 * source register.
76 */
77static INLINE const GLfloat *
Brian Paulf4361542008-11-11 10:47:10 -070078get_src_register_pointer(const struct prog_src_register *source,
79 const struct gl_program_machine *machine)
Brian13e3b212007-02-22 16:09:40 -070080{
Brian Paulf4361542008-11-11 10:47:10 -070081 const struct gl_program *prog = machine->CurProgram;
82 GLint reg = source->Index;
83
Brianf183a2d2007-02-23 17:14:30 -070084 if (source->RelAddr) {
Brian Paulf4361542008-11-11 10:47:10 -070085 /* add address register value to src index/offset */
86 reg += machine->AddressReg[0][0];
87 if (reg < 0) {
88 return ZeroVec;
Brianf183a2d2007-02-23 17:14:30 -070089 }
90 }
91
Brian13e3b212007-02-22 16:09:40 -070092 switch (source->File) {
93 case PROGRAM_TEMPORARY:
Brian Paulf4361542008-11-11 10:47:10 -070094 if (reg >= MAX_PROGRAM_TEMPS)
95 return ZeroVec;
96 return machine->Temporaries[reg];
Brian13e3b212007-02-22 16:09:40 -070097
98 case PROGRAM_INPUT:
Brian Paulf4361542008-11-11 10:47:10 -070099 if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
100 if (reg >= VERT_ATTRIB_MAX)
101 return ZeroVec;
102 return machine->VertAttribs[reg];
Brian13e3b212007-02-22 16:09:40 -0700103 }
104 else {
Brian Paulf4361542008-11-11 10:47:10 -0700105 if (reg >= FRAG_ATTRIB_MAX)
106 return ZeroVec;
107 return machine->Attribs[reg][machine->CurElement];
Brian13e3b212007-02-22 16:09:40 -0700108 }
109
110 case PROGRAM_OUTPUT:
Brian Paulf4361542008-11-11 10:47:10 -0700111 if (reg >= MAX_PROGRAM_OUTPUTS)
112 return ZeroVec;
113 return machine->Outputs[reg];
Brian13e3b212007-02-22 16:09:40 -0700114
115 case PROGRAM_LOCAL_PARAM:
Brian Paulf4361542008-11-11 10:47:10 -0700116 if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
117 return ZeroVec;
118 return machine->CurProgram->LocalParams[reg];
Brian13e3b212007-02-22 16:09:40 -0700119
120 case PROGRAM_ENV_PARAM:
Brian Paulf4361542008-11-11 10:47:10 -0700121 if (reg >= MAX_PROGRAM_ENV_PARAMS)
122 return ZeroVec;
123 return machine->EnvParams[reg];
Brian13e3b212007-02-22 16:09:40 -0700124
125 case PROGRAM_STATE_VAR:
126 /* Fallthrough */
127 case PROGRAM_CONSTANT:
128 /* Fallthrough */
129 case PROGRAM_UNIFORM:
130 /* Fallthrough */
131 case PROGRAM_NAMED_PARAM:
Brian Paulf4361542008-11-11 10:47:10 -0700132 if (reg >= (GLint) prog->Parameters->NumParameters)
133 return ZeroVec;
134 return prog->Parameters->ParameterValues[reg];
Brian13e3b212007-02-22 16:09:40 -0700135
136 default:
Brian33eac562007-02-25 18:52:41 -0700137 _mesa_problem(NULL,
Brian Paulf4361542008-11-11 10:47:10 -0700138 "Invalid src register file %d in get_src_register_pointer()",
139 source->File);
Brian13e3b212007-02-22 16:09:40 -0700140 return NULL;
141 }
142}
143
144
Brian Paulf4361542008-11-11 10:47:10 -0700145/**
146 * Return a pointer to the 4-element float vector specified by the given
147 * destination register.
148 */
149static INLINE GLfloat *
150get_dst_register_pointer(const struct prog_dst_register *dest,
151 struct gl_program_machine *machine)
152{
153 static GLfloat dummyReg[4];
154 GLint reg = dest->Index;
155
156 if (dest->RelAddr) {
157 /* add address register value to src index/offset */
158 reg += machine->AddressReg[0][0];
159 if (reg < 0) {
160 return dummyReg;
161 }
162 }
163
164 switch (dest->File) {
165 case PROGRAM_TEMPORARY:
166 if (reg >= MAX_PROGRAM_TEMPS)
167 return dummyReg;
168 return machine->Temporaries[reg];
169
170 case PROGRAM_OUTPUT:
171 if (reg >= MAX_PROGRAM_OUTPUTS)
172 return dummyReg;
173 return machine->Outputs[reg];
174
175 case PROGRAM_WRITE_ONLY:
176 return dummyReg;
177
178 default:
179 _mesa_problem(NULL,
180 "Invalid dest register file %d in get_dst_register_pointer()",
181 dest->File);
182 return NULL;
183 }
184}
185
186
187
Brian13e3b212007-02-22 16:09:40 -0700188/**
189 * Fetch a 4-element float vector from the given source register.
190 * Apply swizzling and negating as needed.
191 */
192static void
Brian33eac562007-02-25 18:52:41 -0700193fetch_vector4(const struct prog_src_register *source,
Briane80d9012007-02-23 16:53:24 -0700194 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700195{
Brian Paulf4361542008-11-11 10:47:10 -0700196 const GLfloat *src = get_src_register_pointer(source, machine);
Brian13e3b212007-02-22 16:09:40 -0700197 ASSERT(src);
198
199 if (source->Swizzle == SWIZZLE_NOOP) {
200 /* no swizzling */
201 COPY_4V(result, src);
202 }
203 else {
204 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
205 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
206 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
207 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
208 result[0] = src[GET_SWZ(source->Swizzle, 0)];
209 result[1] = src[GET_SWZ(source->Swizzle, 1)];
210 result[2] = src[GET_SWZ(source->Swizzle, 2)];
211 result[3] = src[GET_SWZ(source->Swizzle, 3)];
212 }
213
Brian13e3b212007-02-22 16:09:40 -0700214 if (source->Abs) {
215 result[0] = FABSF(result[0]);
216 result[1] = FABSF(result[1]);
217 result[2] = FABSF(result[2]);
218 result[3] = FABSF(result[3]);
219 }
Brian Paul7db7ff82009-04-14 22:14:30 -0600220 if (source->Negate) {
221 ASSERT(source->Negate == NEGATE_XYZW);
Brian13e3b212007-02-22 16:09:40 -0700222 result[0] = -result[0];
223 result[1] = -result[1];
224 result[2] = -result[2];
225 result[3] = -result[3];
226 }
Brian Paul92009542009-06-03 15:43:53 -0600227
228#ifdef NAN_CHECK
229 assert(!IS_INF_OR_NAN(result[0]));
230 assert(!IS_INF_OR_NAN(result[0]));
231 assert(!IS_INF_OR_NAN(result[0]));
232 assert(!IS_INF_OR_NAN(result[0]));
233#endif
Brian13e3b212007-02-22 16:09:40 -0700234}
235
Brian62da6a12007-05-02 18:44:34 -0600236
Brian13e3b212007-02-22 16:09:40 -0700237/**
Brian Paul37eef7b2008-11-07 09:33:55 -0700238 * Fetch a 4-element uint vector from the given source register.
239 * Apply swizzling but not negation/abs.
240 */
241static void
242fetch_vector4ui(const struct prog_src_register *source,
243 const struct gl_program_machine *machine, GLuint result[4])
244{
Brian Paulf4361542008-11-11 10:47:10 -0700245 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
Brian Paul37eef7b2008-11-07 09:33:55 -0700246 ASSERT(src);
247
248 if (source->Swizzle == SWIZZLE_NOOP) {
249 /* no swizzling */
250 COPY_4V(result, src);
251 }
252 else {
253 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
254 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
255 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
256 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
257 result[0] = src[GET_SWZ(source->Swizzle, 0)];
258 result[1] = src[GET_SWZ(source->Swizzle, 1)];
259 result[2] = src[GET_SWZ(source->Swizzle, 2)];
260 result[3] = src[GET_SWZ(source->Swizzle, 3)];
261 }
262
Brian Paul7db7ff82009-04-14 22:14:30 -0600263 /* Note: no Negate or Abs here */
Brian Paul37eef7b2008-11-07 09:33:55 -0700264}
265
266
267
268/**
Brian62da6a12007-05-02 18:44:34 -0600269 * Fetch the derivative with respect to X or Y for the given register.
270 * XXX this currently only works for fragment program input attribs.
Brian13e3b212007-02-22 16:09:40 -0700271 */
Brian62da6a12007-05-02 18:44:34 -0600272static void
Briane80d9012007-02-23 16:53:24 -0700273fetch_vector4_deriv(GLcontext * ctx,
274 const struct prog_src_register *source,
Brian62da6a12007-05-02 18:44:34 -0600275 const struct gl_program_machine *machine,
276 char xOrY, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700277{
Brian Paul37eef7b2008-11-07 09:33:55 -0700278 if (source->File == PROGRAM_INPUT &&
279 source->Index < (GLint) machine->NumDeriv) {
Brian62da6a12007-05-02 18:44:34 -0600280 const GLint col = machine->CurElement;
281 const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
282 const GLfloat invQ = 1.0f / w;
283 GLfloat deriv[4];
Brian13e3b212007-02-22 16:09:40 -0700284
Brian13e3b212007-02-22 16:09:40 -0700285 if (xOrY == 'X') {
Brian62da6a12007-05-02 18:44:34 -0600286 deriv[0] = machine->DerivX[source->Index][0] * invQ;
287 deriv[1] = machine->DerivX[source->Index][1] * invQ;
288 deriv[2] = machine->DerivX[source->Index][2] * invQ;
289 deriv[3] = machine->DerivX[source->Index][3] * invQ;
Brian13e3b212007-02-22 16:09:40 -0700290 }
291 else {
Brian62da6a12007-05-02 18:44:34 -0600292 deriv[0] = machine->DerivY[source->Index][0] * invQ;
293 deriv[1] = machine->DerivY[source->Index][1] * invQ;
294 deriv[2] = machine->DerivY[source->Index][2] * invQ;
295 deriv[3] = machine->DerivY[source->Index][3] * invQ;
Brian13e3b212007-02-22 16:09:40 -0700296 }
Brian13e3b212007-02-22 16:09:40 -0700297
Brian62da6a12007-05-02 18:44:34 -0600298 result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
299 result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
300 result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
301 result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
302
Brian62da6a12007-05-02 18:44:34 -0600303 if (source->Abs) {
304 result[0] = FABSF(result[0]);
305 result[1] = FABSF(result[1]);
306 result[2] = FABSF(result[2]);
307 result[3] = FABSF(result[3]);
308 }
Brian Paul7db7ff82009-04-14 22:14:30 -0600309 if (source->Negate) {
310 ASSERT(source->Negate == NEGATE_XYZW);
Brian62da6a12007-05-02 18:44:34 -0600311 result[0] = -result[0];
312 result[1] = -result[1];
313 result[2] = -result[2];
314 result[3] = -result[3];
315 }
Brian13e3b212007-02-22 16:09:40 -0700316 }
Brian62da6a12007-05-02 18:44:34 -0600317 else {
318 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
Brian13e3b212007-02-22 16:09:40 -0700319 }
Brian13e3b212007-02-22 16:09:40 -0700320}
Brian13e3b212007-02-22 16:09:40 -0700321
322
323/**
324 * As above, but only return result[0] element.
325 */
326static void
Brian33eac562007-02-25 18:52:41 -0700327fetch_vector1(const struct prog_src_register *source,
Briane80d9012007-02-23 16:53:24 -0700328 const struct gl_program_machine *machine, GLfloat result[4])
Brian13e3b212007-02-22 16:09:40 -0700329{
Brian Paulf4361542008-11-11 10:47:10 -0700330 const GLfloat *src = get_src_register_pointer(source, machine);
Brian13e3b212007-02-22 16:09:40 -0700331 ASSERT(src);
332
333 result[0] = src[GET_SWZ(source->Swizzle, 0)];
334
Brian13e3b212007-02-22 16:09:40 -0700335 if (source->Abs) {
336 result[0] = FABSF(result[0]);
337 }
Brian Paul7db7ff82009-04-14 22:14:30 -0600338 if (source->Negate) {
Brian13e3b212007-02-22 16:09:40 -0700339 result[0] = -result[0];
340 }
341}
342
343
344/**
Brian999b5562007-11-23 12:01:57 -0700345 * Fetch texel from texture. Use partial derivatives when possible.
346 */
347static INLINE void
348fetch_texel(GLcontext *ctx,
349 const struct gl_program_machine *machine,
350 const struct prog_instruction *inst,
351 const GLfloat texcoord[4], GLfloat lodBias,
352 GLfloat color[4])
353{
Brian Paulade50832008-05-14 16:09:46 -0600354 const GLuint unit = machine->Samplers[inst->TexSrcUnit];
355
Brian999b5562007-11-23 12:01:57 -0700356 /* Note: we only have the right derivatives for fragment input attribs.
357 */
358 if (machine->NumDeriv > 0 &&
359 inst->SrcReg[0].File == PROGRAM_INPUT &&
360 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
361 /* simple texture fetch for which we should have derivatives */
362 GLuint attr = inst->SrcReg[0].Index;
363 machine->FetchTexelDeriv(ctx, texcoord,
364 machine->DerivX[attr],
365 machine->DerivY[attr],
Brian Paulade50832008-05-14 16:09:46 -0600366 lodBias, unit, color);
Brian999b5562007-11-23 12:01:57 -0700367 }
368 else {
Brian Paulade50832008-05-14 16:09:46 -0600369 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
Brian999b5562007-11-23 12:01:57 -0700370 }
371}
372
373
374/**
Brian13e3b212007-02-22 16:09:40 -0700375 * Test value against zero and return GT, LT, EQ or UN if NaN.
376 */
377static INLINE GLuint
Briane80d9012007-02-23 16:53:24 -0700378generate_cc(float value)
Brian13e3b212007-02-22 16:09:40 -0700379{
380 if (value != value)
Briane80d9012007-02-23 16:53:24 -0700381 return COND_UN; /* NaN */
Brian13e3b212007-02-22 16:09:40 -0700382 if (value > 0.0F)
383 return COND_GT;
384 if (value < 0.0F)
385 return COND_LT;
386 return COND_EQ;
387}
388
389
390/**
391 * Test if the ccMaskRule is satisfied by the given condition code.
392 * Used to mask destination writes according to the current condition code.
393 */
394static INLINE GLboolean
395test_cc(GLuint condCode, GLuint ccMaskRule)
396{
397 switch (ccMaskRule) {
398 case COND_EQ: return (condCode == COND_EQ);
399 case COND_NE: return (condCode != COND_EQ);
400 case COND_LT: return (condCode == COND_LT);
401 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
402 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
403 case COND_GT: return (condCode == COND_GT);
404 case COND_TR: return GL_TRUE;
405 case COND_FL: return GL_FALSE;
406 default: return GL_TRUE;
407 }
408}
409
410
411/**
412 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
413 * or GL_FALSE to indicate result.
414 */
415static INLINE GLboolean
416eval_condition(const struct gl_program_machine *machine,
417 const struct prog_instruction *inst)
418{
419 const GLuint swizzle = inst->DstReg.CondSwizzle;
420 const GLuint condMask = inst->DstReg.CondMask;
421 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
422 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
423 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
424 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
425 return GL_TRUE;
426 }
427 else {
428 return GL_FALSE;
429 }
430}
431
432
433
434/**
435 * Store 4 floats into a register. Observe the instructions saturate and
436 * set-condition-code flags.
437 */
438static void
Briane80d9012007-02-23 16:53:24 -0700439store_vector4(const struct prog_instruction *inst,
440 struct gl_program_machine *machine, const GLfloat value[4])
Brian13e3b212007-02-22 16:09:40 -0700441{
Brian Paulf4361542008-11-11 10:47:10 -0700442 const struct prog_dst_register *dstReg = &(inst->DstReg);
Brian13e3b212007-02-22 16:09:40 -0700443 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
Brian Paulf4361542008-11-11 10:47:10 -0700444 GLuint writeMask = dstReg->WriteMask;
Brian13e3b212007-02-22 16:09:40 -0700445 GLfloat clampedValue[4];
Brian Paulf4361542008-11-11 10:47:10 -0700446 GLfloat *dst = get_dst_register_pointer(dstReg, machine);
Brian13e3b212007-02-22 16:09:40 -0700447
448#if 0
449 if (value[0] > 1.0e10 ||
450 IS_INF_OR_NAN(value[0]) ||
451 IS_INF_OR_NAN(value[1]) ||
Briane80d9012007-02-23 16:53:24 -0700452 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
Brian13e3b212007-02-22 16:09:40 -0700453 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
454#endif
455
456 if (clamp) {
457 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
458 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
459 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
460 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
461 value = clampedValue;
462 }
463
Brian Paulf4361542008-11-11 10:47:10 -0700464 if (dstReg->CondMask != COND_TR) {
Brian13e3b212007-02-22 16:09:40 -0700465 /* condition codes may turn off some writes */
466 if (writeMask & WRITEMASK_X) {
Brian Paulf4361542008-11-11 10:47:10 -0700467 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
468 dstReg->CondMask))
Brian13e3b212007-02-22 16:09:40 -0700469 writeMask &= ~WRITEMASK_X;
470 }
471 if (writeMask & WRITEMASK_Y) {
Brian Paulf4361542008-11-11 10:47:10 -0700472 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
473 dstReg->CondMask))
Brian13e3b212007-02-22 16:09:40 -0700474 writeMask &= ~WRITEMASK_Y;
475 }
476 if (writeMask & WRITEMASK_Z) {
Brian Paulf4361542008-11-11 10:47:10 -0700477 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
478 dstReg->CondMask))
Brian13e3b212007-02-22 16:09:40 -0700479 writeMask &= ~WRITEMASK_Z;
480 }
481 if (writeMask & WRITEMASK_W) {
Brian Paulf4361542008-11-11 10:47:10 -0700482 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
483 dstReg->CondMask))
Brian13e3b212007-02-22 16:09:40 -0700484 writeMask &= ~WRITEMASK_W;
485 }
486 }
487
Brian Paul92009542009-06-03 15:43:53 -0600488#ifdef NAN_CHECK
489 assert(!IS_INF_OR_NAN(value[0]));
490 assert(!IS_INF_OR_NAN(value[0]));
491 assert(!IS_INF_OR_NAN(value[0]));
492 assert(!IS_INF_OR_NAN(value[0]));
493#endif
494
Brian13e3b212007-02-22 16:09:40 -0700495 if (writeMask & WRITEMASK_X)
Brian Paulf4361542008-11-11 10:47:10 -0700496 dst[0] = value[0];
Brian13e3b212007-02-22 16:09:40 -0700497 if (writeMask & WRITEMASK_Y)
Brian Paulf4361542008-11-11 10:47:10 -0700498 dst[1] = value[1];
Brian13e3b212007-02-22 16:09:40 -0700499 if (writeMask & WRITEMASK_Z)
Brian Paulf4361542008-11-11 10:47:10 -0700500 dst[2] = value[2];
Brian13e3b212007-02-22 16:09:40 -0700501 if (writeMask & WRITEMASK_W)
Brian Paulf4361542008-11-11 10:47:10 -0700502 dst[3] = value[3];
Brian13e3b212007-02-22 16:09:40 -0700503
504 if (inst->CondUpdate) {
505 if (writeMask & WRITEMASK_X)
506 machine->CondCodes[0] = generate_cc(value[0]);
507 if (writeMask & WRITEMASK_Y)
508 machine->CondCodes[1] = generate_cc(value[1]);
509 if (writeMask & WRITEMASK_Z)
510 machine->CondCodes[2] = generate_cc(value[2]);
511 if (writeMask & WRITEMASK_W)
512 machine->CondCodes[3] = generate_cc(value[3]);
Briana01616e2007-03-28 11:01:28 -0600513#if DEBUG_PROG
514 printf("CondCodes=(%s,%s,%s,%s) for:\n",
515 _mesa_condcode_string(machine->CondCodes[0]),
516 _mesa_condcode_string(machine->CondCodes[1]),
517 _mesa_condcode_string(machine->CondCodes[2]),
518 _mesa_condcode_string(machine->CondCodes[3]));
519#endif
Brian13e3b212007-02-22 16:09:40 -0700520 }
521}
522
523
Brian13e3b212007-02-22 16:09:40 -0700524/**
Brian Paul37eef7b2008-11-07 09:33:55 -0700525 * Store 4 uints into a register. Observe the set-condition-code flags.
526 */
527static void
528store_vector4ui(const struct prog_instruction *inst,
529 struct gl_program_machine *machine, const GLuint value[4])
530{
Brian Paulf4361542008-11-11 10:47:10 -0700531 const struct prog_dst_register *dstReg = &(inst->DstReg);
532 GLuint writeMask = dstReg->WriteMask;
533 GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
Brian Paul37eef7b2008-11-07 09:33:55 -0700534
Brian Paulf4361542008-11-11 10:47:10 -0700535 if (dstReg->CondMask != COND_TR) {
Brian Paul37eef7b2008-11-07 09:33:55 -0700536 /* condition codes may turn off some writes */
537 if (writeMask & WRITEMASK_X) {
Brian Paulf4361542008-11-11 10:47:10 -0700538 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
539 dstReg->CondMask))
Brian Paul37eef7b2008-11-07 09:33:55 -0700540 writeMask &= ~WRITEMASK_X;
541 }
542 if (writeMask & WRITEMASK_Y) {
Brian Paulf4361542008-11-11 10:47:10 -0700543 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
544 dstReg->CondMask))
Brian Paul37eef7b2008-11-07 09:33:55 -0700545 writeMask &= ~WRITEMASK_Y;
546 }
547 if (writeMask & WRITEMASK_Z) {
Brian Paulf4361542008-11-11 10:47:10 -0700548 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
549 dstReg->CondMask))
Brian Paul37eef7b2008-11-07 09:33:55 -0700550 writeMask &= ~WRITEMASK_Z;
551 }
552 if (writeMask & WRITEMASK_W) {
Brian Paulf4361542008-11-11 10:47:10 -0700553 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
554 dstReg->CondMask))
Brian Paul37eef7b2008-11-07 09:33:55 -0700555 writeMask &= ~WRITEMASK_W;
556 }
557 }
558
559 if (writeMask & WRITEMASK_X)
Brian Paulf4361542008-11-11 10:47:10 -0700560 dst[0] = value[0];
Brian Paul37eef7b2008-11-07 09:33:55 -0700561 if (writeMask & WRITEMASK_Y)
Brian Paulf4361542008-11-11 10:47:10 -0700562 dst[1] = value[1];
Brian Paul37eef7b2008-11-07 09:33:55 -0700563 if (writeMask & WRITEMASK_Z)
Brian Paulf4361542008-11-11 10:47:10 -0700564 dst[2] = value[2];
Brian Paul37eef7b2008-11-07 09:33:55 -0700565 if (writeMask & WRITEMASK_W)
Brian Paulf4361542008-11-11 10:47:10 -0700566 dst[3] = value[3];
Brian Paul37eef7b2008-11-07 09:33:55 -0700567
568 if (inst->CondUpdate) {
569 if (writeMask & WRITEMASK_X)
570 machine->CondCodes[0] = generate_cc(value[0]);
571 if (writeMask & WRITEMASK_Y)
572 machine->CondCodes[1] = generate_cc(value[1]);
573 if (writeMask & WRITEMASK_Z)
574 machine->CondCodes[2] = generate_cc(value[2]);
575 if (writeMask & WRITEMASK_W)
576 machine->CondCodes[3] = generate_cc(value[3]);
577#if DEBUG_PROG
578 printf("CondCodes=(%s,%s,%s,%s) for:\n",
579 _mesa_condcode_string(machine->CondCodes[0]),
580 _mesa_condcode_string(machine->CondCodes[1]),
581 _mesa_condcode_string(machine->CondCodes[2]),
582 _mesa_condcode_string(machine->CondCodes[3]));
583#endif
584 }
585}
586
587
588
589/**
Brian13e3b212007-02-22 16:09:40 -0700590 * Execute the given vertex/fragment program.
591 *
Brian3c1c9992007-02-25 19:11:44 -0700592 * \param ctx rendering context
593 * \param program the program to execute
594 * \param machine machine state (must be initialized)
Brian13e3b212007-02-22 16:09:40 -0700595 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
596 */
597GLboolean
Briane80d9012007-02-23 16:53:24 -0700598_mesa_execute_program(GLcontext * ctx,
Brian8b34b7d2007-02-25 18:26:50 -0700599 const struct gl_program *program,
Brian085d7d52007-02-25 18:23:37 -0700600 struct gl_program_machine *machine)
Brian13e3b212007-02-22 16:09:40 -0700601{
Brian8b34b7d2007-02-25 18:26:50 -0700602 const GLuint numInst = program->NumInstructions;
Briancfd00112007-02-25 18:30:45 -0700603 const GLuint maxExec = 10000;
José Fonseca452a5922008-05-31 18:14:09 +0900604 GLuint pc, numExec = 0;
Brian13e3b212007-02-22 16:09:40 -0700605
606 machine->CurProgram = program;
607
608 if (DEBUG_PROG) {
609 printf("execute program %u --------------------\n", program->Id);
610 }
611
Brian33eac562007-02-25 18:52:41 -0700612 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
613 machine->EnvParams = ctx->VertexProgram.Parameters;
614 }
615 else {
616 machine->EnvParams = ctx->FragmentProgram.Parameters;
617 }
618
Brian8b34b7d2007-02-25 18:26:50 -0700619 for (pc = 0; pc < numInst; pc++) {
Brian13e3b212007-02-22 16:09:40 -0700620 const struct prog_instruction *inst = program->Instructions + pc;
621
Brian13e3b212007-02-22 16:09:40 -0700622 if (DEBUG_PROG) {
623 _mesa_print_instruction(inst);
624 }
625
626 switch (inst->Opcode) {
Briane80d9012007-02-23 16:53:24 -0700627 case OPCODE_ABS:
628 {
629 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700630 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700631 result[0] = FABSF(a[0]);
632 result[1] = FABSF(a[1]);
633 result[2] = FABSF(a[2]);
634 result[3] = FABSF(a[3]);
635 store_vector4(inst, machine, result);
636 }
637 break;
638 case OPCODE_ADD:
639 {
640 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700641 fetch_vector4(&inst->SrcReg[0], machine, a);
642 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700643 result[0] = a[0] + b[0];
644 result[1] = a[1] + b[1];
645 result[2] = a[2] + b[2];
646 result[3] = a[3] + b[3];
647 store_vector4(inst, machine, result);
648 if (DEBUG_PROG) {
649 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
650 result[0], result[1], result[2], result[3],
651 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700652 }
Briane80d9012007-02-23 16:53:24 -0700653 }
654 break;
Brian Paul37eef7b2008-11-07 09:33:55 -0700655 case OPCODE_AND: /* bitwise AND */
656 {
657 GLuint a[4], b[4], result[4];
658 fetch_vector4ui(&inst->SrcReg[0], machine, a);
659 fetch_vector4ui(&inst->SrcReg[1], machine, b);
660 result[0] = a[0] & b[0];
661 result[1] = a[1] & b[1];
662 result[2] = a[2] & b[2];
663 result[3] = a[3] & b[3];
664 store_vector4ui(inst, machine, result);
665 }
666 break;
Brianf183a2d2007-02-23 17:14:30 -0700667 case OPCODE_ARL:
668 {
669 GLfloat t[4];
Brian33eac562007-02-25 18:52:41 -0700670 fetch_vector4(&inst->SrcReg[0], machine, t);
Brian Paula9475cc2008-12-12 18:03:48 -0700671 machine->AddressReg[0][0] = IFLOOR(t[0]);
Brianf183a2d2007-02-23 17:14:30 -0700672 }
673 break;
Briane80d9012007-02-23 16:53:24 -0700674 case OPCODE_BGNLOOP:
675 /* no-op */
676 break;
677 case OPCODE_ENDLOOP:
678 /* subtract 1 here since pc is incremented by for(pc) loop */
679 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
680 break;
681 case OPCODE_BGNSUB: /* begin subroutine */
682 break;
683 case OPCODE_ENDSUB: /* end subroutine */
684 break;
685 case OPCODE_BRA: /* branch (conditional) */
686 /* fall-through */
687 case OPCODE_BRK: /* break out of loop (conditional) */
688 /* fall-through */
689 case OPCODE_CONT: /* continue loop (conditional) */
690 if (eval_condition(machine, inst)) {
691 /* take branch */
692 /* Subtract 1 here since we'll do pc++ at end of for-loop */
693 pc = inst->BranchTarget - 1;
694 }
695 break;
696 case OPCODE_CAL: /* Call subroutine (conditional) */
697 if (eval_condition(machine, inst)) {
698 /* call the subroutine */
699 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
700 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -0700701 }
Briana0275b02007-03-27 11:02:20 -0600702 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
Brian31dc7a32007-03-27 15:21:35 -0600703 /* Subtract 1 here since we'll do pc++ at end of for-loop */
704 pc = inst->BranchTarget - 1;
Briane80d9012007-02-23 16:53:24 -0700705 }
706 break;
707 case OPCODE_CMP:
708 {
709 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700710 fetch_vector4(&inst->SrcReg[0], machine, a);
711 fetch_vector4(&inst->SrcReg[1], machine, b);
712 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -0700713 result[0] = a[0] < 0.0F ? b[0] : c[0];
714 result[1] = a[1] < 0.0F ? b[1] : c[1];
715 result[2] = a[2] < 0.0F ? b[2] : c[2];
716 result[3] = a[3] < 0.0F ? b[3] : c[3];
717 store_vector4(inst, machine, result);
718 }
719 break;
720 case OPCODE_COS:
721 {
722 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700723 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700724 result[0] = result[1] = result[2] = result[3]
725 = (GLfloat) _mesa_cos(a[0]);
726 store_vector4(inst, machine, result);
727 }
728 break;
729 case OPCODE_DDX: /* Partial derivative with respect to X */
730 {
Brian62da6a12007-05-02 18:44:34 -0600731 GLfloat result[4];
732 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
733 'X', result);
Briane80d9012007-02-23 16:53:24 -0700734 store_vector4(inst, machine, result);
Briane80d9012007-02-23 16:53:24 -0700735 }
736 break;
737 case OPCODE_DDY: /* Partial derivative with respect to Y */
738 {
Brian62da6a12007-05-02 18:44:34 -0600739 GLfloat result[4];
740 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
741 'Y', result);
Briane80d9012007-02-23 16:53:24 -0700742 store_vector4(inst, machine, result);
Briane80d9012007-02-23 16:53:24 -0700743 }
744 break;
Brian Paul65cb74e2008-11-07 09:41:00 -0700745 case OPCODE_DP2:
746 {
747 GLfloat a[4], b[4], result[4];
748 fetch_vector4(&inst->SrcReg[0], machine, a);
749 fetch_vector4(&inst->SrcReg[1], machine, b);
750 result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
751 store_vector4(inst, machine, result);
752 if (DEBUG_PROG) {
753 printf("DP2 %g = (%g %g) . (%g %g)\n",
754 result[0], a[0], a[1], b[0], b[1]);
755 }
756 }
757 break;
758 case OPCODE_DP2A:
759 {
760 GLfloat a[4], b[4], c, result[4];
761 fetch_vector4(&inst->SrcReg[0], machine, a);
762 fetch_vector4(&inst->SrcReg[1], machine, b);
763 fetch_vector1(&inst->SrcReg[1], machine, &c);
764 result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
765 store_vector4(inst, machine, result);
766 if (DEBUG_PROG) {
767 printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
768 result[0], a[0], a[1], b[0], b[1], c);
769 }
770 }
771 break;
Briane80d9012007-02-23 16:53:24 -0700772 case OPCODE_DP3:
773 {
774 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700775 fetch_vector4(&inst->SrcReg[0], machine, a);
776 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700777 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
778 store_vector4(inst, machine, result);
779 if (DEBUG_PROG) {
780 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
781 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
Brian13e3b212007-02-22 16:09:40 -0700782 }
Briane80d9012007-02-23 16:53:24 -0700783 }
784 break;
785 case OPCODE_DP4:
786 {
787 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700788 fetch_vector4(&inst->SrcReg[0], machine, a);
789 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700790 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
791 store_vector4(inst, machine, result);
792 if (DEBUG_PROG) {
793 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
794 result[0], a[0], a[1], a[2], a[3],
795 b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -0700796 }
Briane80d9012007-02-23 16:53:24 -0700797 }
798 break;
799 case OPCODE_DPH:
800 {
801 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700802 fetch_vector4(&inst->SrcReg[0], machine, a);
803 fetch_vector4(&inst->SrcReg[1], machine, b);
Brian Paul65cb74e2008-11-07 09:41:00 -0700804 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
Briane80d9012007-02-23 16:53:24 -0700805 store_vector4(inst, machine, result);
806 }
807 break;
808 case OPCODE_DST: /* Distance vector */
809 {
810 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700811 fetch_vector4(&inst->SrcReg[0], machine, a);
812 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -0700813 result[0] = 1.0F;
814 result[1] = a[1] * b[1];
815 result[2] = a[2];
816 result[3] = b[3];
817 store_vector4(inst, machine, result);
818 }
819 break;
Brianf183a2d2007-02-23 17:14:30 -0700820 case OPCODE_EXP:
Brianf183a2d2007-02-23 17:14:30 -0700821 {
822 GLfloat t[4], q[4], floor_t0;
Brian33eac562007-02-25 18:52:41 -0700823 fetch_vector1(&inst->SrcReg[0], machine, t);
Brianf183a2d2007-02-23 17:14:30 -0700824 floor_t0 = FLOORF(t[0]);
825 if (floor_t0 > FLT_MAX_EXP) {
826 SET_POS_INFINITY(q[0]);
827 SET_POS_INFINITY(q[2]);
828 }
829 else if (floor_t0 < FLT_MIN_EXP) {
830 q[0] = 0.0F;
831 q[2] = 0.0F;
832 }
833 else {
Brian761728a2007-02-24 11:14:57 -0700834 q[0] = LDEXPF(1.0, (int) floor_t0);
835 /* Note: GL_NV_vertex_program expects
836 * result.z = result.x * APPX(result.y)
837 * We do what the ARB extension says.
838 */
Brian Paulb8a200a2009-04-03 10:29:11 -0600839 q[2] = (GLfloat) _mesa_pow(2.0, t[0]);
Brianf183a2d2007-02-23 17:14:30 -0700840 }
841 q[1] = t[0] - floor_t0;
842 q[3] = 1.0F;
843 store_vector4( inst, machine, q );
844 }
845 break;
Briane80d9012007-02-23 16:53:24 -0700846 case OPCODE_EX2: /* Exponential base 2 */
847 {
Brian Paul035de6a2009-06-03 15:42:52 -0600848 GLfloat a[4], result[4], val;
Brian33eac562007-02-25 18:52:41 -0700849 fetch_vector1(&inst->SrcReg[0], machine, a);
Brian Paul035de6a2009-06-03 15:42:52 -0600850 val = (GLfloat) _mesa_pow(2.0, a[0]);
851 /*
852 if (IS_INF_OR_NAN(val))
853 val = 1.0e10;
854 */
855 result[0] = result[1] = result[2] = result[3] = val;
Briane80d9012007-02-23 16:53:24 -0700856 store_vector4(inst, machine, result);
857 }
858 break;
859 case OPCODE_FLR:
860 {
861 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700862 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700863 result[0] = FLOORF(a[0]);
864 result[1] = FLOORF(a[1]);
865 result[2] = FLOORF(a[2]);
866 result[3] = FLOORF(a[3]);
867 store_vector4(inst, machine, result);
868 }
869 break;
870 case OPCODE_FRC:
871 {
872 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700873 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700874 result[0] = a[0] - FLOORF(a[0]);
875 result[1] = a[1] - FLOORF(a[1]);
876 result[2] = a[2] - FLOORF(a[2]);
877 result[3] = a[3] - FLOORF(a[3]);
878 store_vector4(inst, machine, result);
879 }
880 break;
881 case OPCODE_IF:
Brian63556fa2007-03-23 14:47:46 -0600882 {
883 GLboolean cond;
884 /* eval condition */
885 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
886 GLfloat a[4];
887 fetch_vector1(&inst->SrcReg[0], machine, a);
888 cond = (a[0] != 0.0);
889 }
890 else {
891 cond = eval_condition(machine, inst);
892 }
Briana7f73662007-04-20 08:11:51 -0600893 if (DEBUG_PROG) {
894 printf("IF: %d\n", cond);
895 }
Brian63556fa2007-03-23 14:47:46 -0600896 /* do if/else */
897 if (cond) {
898 /* do if-clause (just continue execution) */
899 }
900 else {
901 /* go to the instruction after ELSE or ENDIF */
902 assert(inst->BranchTarget >= 0);
903 pc = inst->BranchTarget - 1;
904 }
Briane80d9012007-02-23 16:53:24 -0700905 }
906 break;
907 case OPCODE_ELSE:
908 /* goto ENDIF */
909 assert(inst->BranchTarget >= 0);
910 pc = inst->BranchTarget - 1;
911 break;
912 case OPCODE_ENDIF:
913 /* nothing */
914 break;
Briane80d9012007-02-23 16:53:24 -0700915 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
916 if (eval_condition(machine, inst)) {
917 return GL_FALSE;
918 }
919 break;
920 case OPCODE_KIL: /* ARB_f_p only */
921 {
922 GLfloat a[4];
Brian33eac562007-02-25 18:52:41 -0700923 fetch_vector4(&inst->SrcReg[0], machine, a);
Brian Paulc0633dd2009-08-31 14:57:59 -0600924 if (DEBUG_PROG) {
925 printf("KIL if (%g %g %g %g) <= 0.0\n",
926 a[0], a[1], a[2], a[3]);
927 }
928
Briane80d9012007-02-23 16:53:24 -0700929 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
Brian13e3b212007-02-22 16:09:40 -0700930 return GL_FALSE;
931 }
Briane80d9012007-02-23 16:53:24 -0700932 }
933 break;
934 case OPCODE_LG2: /* log base 2 */
935 {
Brian Paul035de6a2009-06-03 15:42:52 -0600936 GLfloat a[4], result[4], val;
Brian33eac562007-02-25 18:52:41 -0700937 fetch_vector1(&inst->SrcReg[0], machine, a);
Ian Romanick962fa6b2008-12-18 14:11:06 -0800938 /* The fast LOG2 macro doesn't meet the precision requirements.
939 */
Brian Paul035de6a2009-06-03 15:42:52 -0600940 if (a[0] == 0.0F) {
Vinson Lee18883cd2009-10-01 13:33:20 -0600941 val = -FLT_MAX;
Brian Paul035de6a2009-06-03 15:42:52 -0600942 }
943 else {
944 val = log(a[0]) * 1.442695F;
945 }
946 result[0] = result[1] = result[2] = result[3] = val;
Briane80d9012007-02-23 16:53:24 -0700947 store_vector4(inst, machine, result);
948 }
949 break;
950 case OPCODE_LIT:
951 {
952 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
953 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -0700954 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -0700955 a[0] = MAX2(a[0], 0.0F);
956 a[1] = MAX2(a[1], 0.0F);
957 /* XXX ARB version clamps a[3], NV version doesn't */
958 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
959 result[0] = 1.0F;
960 result[1] = a[0];
961 /* XXX we could probably just use pow() here */
962 if (a[0] > 0.0F) {
963 if (a[1] == 0.0 && a[3] == 0.0)
964 result[2] = 1.0;
965 else
Brian Paulb8a200a2009-04-03 10:29:11 -0600966 result[2] = (GLfloat) _mesa_pow(a[1], a[3]);
Brian13e3b212007-02-22 16:09:40 -0700967 }
Briane80d9012007-02-23 16:53:24 -0700968 else {
969 result[2] = 0.0;
Brian13e3b212007-02-22 16:09:40 -0700970 }
Briane80d9012007-02-23 16:53:24 -0700971 result[3] = 1.0F;
972 store_vector4(inst, machine, result);
973 if (DEBUG_PROG) {
974 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
975 result[0], result[1], result[2], result[3],
976 a[0], a[1], a[2], a[3]);
Brian13e3b212007-02-22 16:09:40 -0700977 }
Briane80d9012007-02-23 16:53:24 -0700978 }
979 break;
Brianf183a2d2007-02-23 17:14:30 -0700980 case OPCODE_LOG:
981 {
982 GLfloat t[4], q[4], abs_t0;
Brian33eac562007-02-25 18:52:41 -0700983 fetch_vector1(&inst->SrcReg[0], machine, t);
Brianf183a2d2007-02-23 17:14:30 -0700984 abs_t0 = FABSF(t[0]);
985 if (abs_t0 != 0.0F) {
986 /* Since we really can't handle infinite values on VMS
987 * like other OSes we'll use __MAXFLOAT to represent
988 * infinity. This may need some tweaking.
989 */
990#ifdef VMS
991 if (abs_t0 == __MAXFLOAT)
992#else
993 if (IS_INF_OR_NAN(abs_t0))
994#endif
995 {
996 SET_POS_INFINITY(q[0]);
997 q[1] = 1.0F;
998 SET_POS_INFINITY(q[2]);
999 }
1000 else {
1001 int exponent;
1002 GLfloat mantissa = FREXPF(t[0], &exponent);
1003 q[0] = (GLfloat) (exponent - 1);
1004 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
Ian Romanick962fa6b2008-12-18 14:11:06 -08001005
1006 /* The fast LOG2 macro doesn't meet the precision
1007 * requirements.
1008 */
1009 q[2] = (log(t[0]) * 1.442695F);
Brianf183a2d2007-02-23 17:14:30 -07001010 }
1011 }
1012 else {
1013 SET_NEG_INFINITY(q[0]);
1014 q[1] = 1.0F;
1015 SET_NEG_INFINITY(q[2]);
1016 }
1017 q[3] = 1.0;
1018 store_vector4(inst, machine, q);
1019 }
1020 break;
Briane80d9012007-02-23 16:53:24 -07001021 case OPCODE_LRP:
1022 {
1023 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001024 fetch_vector4(&inst->SrcReg[0], machine, a);
1025 fetch_vector4(&inst->SrcReg[1], machine, b);
1026 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -07001027 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1028 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1029 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1030 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1031 store_vector4(inst, machine, result);
1032 if (DEBUG_PROG) {
1033 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1034 "(%g %g %g %g), (%g %g %g %g)\n",
1035 result[0], result[1], result[2], result[3],
1036 a[0], a[1], a[2], a[3],
1037 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001038 }
Briane80d9012007-02-23 16:53:24 -07001039 }
1040 break;
1041 case OPCODE_MAD:
1042 {
1043 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001044 fetch_vector4(&inst->SrcReg[0], machine, a);
1045 fetch_vector4(&inst->SrcReg[1], machine, b);
1046 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -07001047 result[0] = a[0] * b[0] + c[0];
1048 result[1] = a[1] * b[1] + c[1];
1049 result[2] = a[2] * b[2] + c[2];
1050 result[3] = a[3] * b[3] + c[3];
1051 store_vector4(inst, machine, result);
1052 if (DEBUG_PROG) {
1053 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1054 "(%g %g %g %g) + (%g %g %g %g)\n",
1055 result[0], result[1], result[2], result[3],
1056 a[0], a[1], a[2], a[3],
1057 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
Brian13e3b212007-02-22 16:09:40 -07001058 }
Briane80d9012007-02-23 16:53:24 -07001059 }
1060 break;
1061 case OPCODE_MAX:
1062 {
1063 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001064 fetch_vector4(&inst->SrcReg[0], machine, a);
1065 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001066 result[0] = MAX2(a[0], b[0]);
1067 result[1] = MAX2(a[1], b[1]);
1068 result[2] = MAX2(a[2], b[2]);
1069 result[3] = MAX2(a[3], b[3]);
1070 store_vector4(inst, machine, result);
1071 if (DEBUG_PROG) {
1072 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1073 result[0], result[1], result[2], result[3],
1074 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001075 }
Briane80d9012007-02-23 16:53:24 -07001076 }
1077 break;
1078 case OPCODE_MIN:
1079 {
1080 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001081 fetch_vector4(&inst->SrcReg[0], machine, a);
1082 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001083 result[0] = MIN2(a[0], b[0]);
1084 result[1] = MIN2(a[1], b[1]);
1085 result[2] = MIN2(a[2], b[2]);
1086 result[3] = MIN2(a[3], b[3]);
1087 store_vector4(inst, machine, result);
1088 }
1089 break;
1090 case OPCODE_MOV:
1091 {
1092 GLfloat result[4];
Brian33eac562007-02-25 18:52:41 -07001093 fetch_vector4(&inst->SrcReg[0], machine, result);
Briane80d9012007-02-23 16:53:24 -07001094 store_vector4(inst, machine, result);
1095 if (DEBUG_PROG) {
1096 printf("MOV (%g %g %g %g)\n",
1097 result[0], result[1], result[2], result[3]);
Brian13e3b212007-02-22 16:09:40 -07001098 }
Briane80d9012007-02-23 16:53:24 -07001099 }
1100 break;
1101 case OPCODE_MUL:
1102 {
1103 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001104 fetch_vector4(&inst->SrcReg[0], machine, a);
1105 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001106 result[0] = a[0] * b[0];
1107 result[1] = a[1] * b[1];
1108 result[2] = a[2] * b[2];
1109 result[3] = a[3] * b[3];
1110 store_vector4(inst, machine, result);
1111 if (DEBUG_PROG) {
1112 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1113 result[0], result[1], result[2], result[3],
1114 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001115 }
Briane80d9012007-02-23 16:53:24 -07001116 }
1117 break;
1118 case OPCODE_NOISE1:
1119 {
1120 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001121 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001122 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001123 result[1] =
Brian Paul702b5b02008-12-15 18:37:39 -07001124 result[2] =
1125 result[3] = _mesa_noise1(a[0]);
Briane80d9012007-02-23 16:53:24 -07001126 store_vector4(inst, machine, result);
1127 }
1128 break;
1129 case OPCODE_NOISE2:
1130 {
1131 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001132 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001133 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001134 result[1] =
Brian Paul702b5b02008-12-15 18:37:39 -07001135 result[2] = result[3] = _mesa_noise2(a[0], a[1]);
Briane80d9012007-02-23 16:53:24 -07001136 store_vector4(inst, machine, result);
1137 }
1138 break;
1139 case OPCODE_NOISE3:
1140 {
1141 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001142 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001143 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001144 result[1] =
1145 result[2] =
Brian Paul702b5b02008-12-15 18:37:39 -07001146 result[3] = _mesa_noise3(a[0], a[1], a[2]);
Briane80d9012007-02-23 16:53:24 -07001147 store_vector4(inst, machine, result);
1148 }
1149 break;
1150 case OPCODE_NOISE4:
1151 {
1152 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001153 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001154 result[0] =
Brian13e3b212007-02-22 16:09:40 -07001155 result[1] =
1156 result[2] =
Brian Paul702b5b02008-12-15 18:37:39 -07001157 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
Briane80d9012007-02-23 16:53:24 -07001158 store_vector4(inst, machine, result);
1159 }
1160 break;
1161 case OPCODE_NOP:
1162 break;
Brian Paul37eef7b2008-11-07 09:33:55 -07001163 case OPCODE_NOT: /* bitwise NOT */
1164 {
1165 GLuint a[4], result[4];
1166 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1167 result[0] = ~a[0];
1168 result[1] = ~a[1];
1169 result[2] = ~a[2];
1170 result[3] = ~a[3];
1171 store_vector4ui(inst, machine, result);
1172 }
1173 break;
Brian Paulf6ead502008-11-07 08:51:31 -07001174 case OPCODE_NRM3: /* 3-component normalization */
1175 {
1176 GLfloat a[4], result[4];
1177 GLfloat tmp;
1178 fetch_vector4(&inst->SrcReg[0], machine, a);
1179 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1180 if (tmp != 0.0F)
Brian Paul22459e72008-11-07 12:59:36 -07001181 tmp = INV_SQRTF(tmp);
Brian Paulf6ead502008-11-07 08:51:31 -07001182 result[0] = tmp * a[0];
1183 result[1] = tmp * a[1];
1184 result[2] = tmp * a[2];
1185 result[3] = 0.0; /* undefined, but prevent valgrind warnings */
1186 store_vector4(inst, machine, result);
1187 }
1188 break;
1189 case OPCODE_NRM4: /* 4-component normalization */
1190 {
1191 GLfloat a[4], result[4];
1192 GLfloat tmp;
1193 fetch_vector4(&inst->SrcReg[0], machine, a);
1194 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1195 if (tmp != 0.0F)
Brian Paul22459e72008-11-07 12:59:36 -07001196 tmp = INV_SQRTF(tmp);
Brian Paulf6ead502008-11-07 08:51:31 -07001197 result[0] = tmp * a[0];
1198 result[1] = tmp * a[1];
1199 result[2] = tmp * a[2];
1200 result[3] = tmp * a[3];
1201 store_vector4(inst, machine, result);
1202 }
1203 break;
Brian Paul37eef7b2008-11-07 09:33:55 -07001204 case OPCODE_OR: /* bitwise OR */
1205 {
1206 GLuint a[4], b[4], result[4];
1207 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1208 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1209 result[0] = a[0] | b[0];
1210 result[1] = a[1] | b[1];
1211 result[2] = a[2] | b[2];
1212 result[3] = a[3] | b[3];
1213 store_vector4ui(inst, machine, result);
1214 }
1215 break;
Briane80d9012007-02-23 16:53:24 -07001216 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1217 {
Brian Paul37eef7b2008-11-07 09:33:55 -07001218 GLfloat a[4];
1219 GLuint result[4];
Briane80d9012007-02-23 16:53:24 -07001220 GLhalfNV hx, hy;
Brian33eac562007-02-25 18:52:41 -07001221 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001222 hx = _mesa_float_to_half(a[0]);
1223 hy = _mesa_float_to_half(a[1]);
Brian Paul37eef7b2008-11-07 09:33:55 -07001224 result[0] =
1225 result[1] =
1226 result[2] =
1227 result[3] = hx | (hy << 16);
1228 store_vector4ui(inst, machine, result);
Briane80d9012007-02-23 16:53:24 -07001229 }
1230 break;
1231 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1232 {
Brian Paul37eef7b2008-11-07 09:33:55 -07001233 GLfloat a[4];
1234 GLuint result[4], usx, usy;
Brian33eac562007-02-25 18:52:41 -07001235 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001236 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1237 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1238 usx = IROUND(a[0] * 65535.0F);
1239 usy = IROUND(a[1] * 65535.0F);
Brian Paul37eef7b2008-11-07 09:33:55 -07001240 result[0] =
1241 result[1] =
1242 result[2] =
1243 result[3] = usx | (usy << 16);
1244 store_vector4ui(inst, machine, result);
Briane80d9012007-02-23 16:53:24 -07001245 }
1246 break;
1247 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1248 {
Brian Paul37eef7b2008-11-07 09:33:55 -07001249 GLfloat a[4];
1250 GLuint result[4], ubx, uby, ubz, ubw;
Brian33eac562007-02-25 18:52:41 -07001251 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001252 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1253 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1254 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1255 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1256 ubx = IROUND(127.0F * a[0] + 128.0F);
1257 uby = IROUND(127.0F * a[1] + 128.0F);
1258 ubz = IROUND(127.0F * a[2] + 128.0F);
1259 ubw = IROUND(127.0F * a[3] + 128.0F);
Brian Paul37eef7b2008-11-07 09:33:55 -07001260 result[0] =
1261 result[1] =
1262 result[2] =
1263 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1264 store_vector4ui(inst, machine, result);
Briane80d9012007-02-23 16:53:24 -07001265 }
1266 break;
1267 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1268 {
Brian Paul37eef7b2008-11-07 09:33:55 -07001269 GLfloat a[4];
1270 GLuint result[4], ubx, uby, ubz, ubw;
Brian33eac562007-02-25 18:52:41 -07001271 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001272 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1273 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1274 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1275 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1276 ubx = IROUND(255.0F * a[0]);
1277 uby = IROUND(255.0F * a[1]);
1278 ubz = IROUND(255.0F * a[2]);
1279 ubw = IROUND(255.0F * a[3]);
Brian Paul37eef7b2008-11-07 09:33:55 -07001280 result[0] =
1281 result[1] =
1282 result[2] =
1283 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1284 store_vector4ui(inst, machine, result);
Briane80d9012007-02-23 16:53:24 -07001285 }
1286 break;
1287 case OPCODE_POW:
1288 {
1289 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001290 fetch_vector1(&inst->SrcReg[0], machine, a);
1291 fetch_vector1(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001292 result[0] = result[1] = result[2] = result[3]
1293 = (GLfloat) _mesa_pow(a[0], b[0]);
1294 store_vector4(inst, machine, result);
1295 }
1296 break;
1297 case OPCODE_RCP:
1298 {
1299 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001300 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001301 if (DEBUG_PROG) {
1302 if (a[0] == 0)
1303 printf("RCP(0)\n");
1304 else if (IS_INF_OR_NAN(a[0]))
1305 printf("RCP(inf)\n");
Brian13e3b212007-02-22 16:09:40 -07001306 }
Briane80d9012007-02-23 16:53:24 -07001307 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1308 store_vector4(inst, machine, result);
1309 }
1310 break;
1311 case OPCODE_RET: /* return from subroutine (conditional) */
1312 if (eval_condition(machine, inst)) {
1313 if (machine->StackDepth == 0) {
1314 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
Brian13e3b212007-02-22 16:09:40 -07001315 }
Briana0275b02007-03-27 11:02:20 -06001316 /* subtract one because of pc++ in the for loop */
1317 pc = machine->CallStack[--machine->StackDepth] - 1;
Briane80d9012007-02-23 16:53:24 -07001318 }
1319 break;
1320 case OPCODE_RFL: /* reflection vector */
1321 {
1322 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
Brian33eac562007-02-25 18:52:41 -07001323 fetch_vector4(&inst->SrcReg[0], machine, axis);
1324 fetch_vector4(&inst->SrcReg[1], machine, dir);
Briane80d9012007-02-23 16:53:24 -07001325 tmpW = DOT3(axis, axis);
1326 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1327 result[0] = tmpX * axis[0] - dir[0];
1328 result[1] = tmpX * axis[1] - dir[1];
1329 result[2] = tmpX * axis[2] - dir[2];
1330 /* result[3] is never written! XXX enforce in parser! */
1331 store_vector4(inst, machine, result);
1332 }
1333 break;
1334 case OPCODE_RSQ: /* 1 / sqrt() */
1335 {
1336 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001337 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001338 a[0] = FABSF(a[0]);
1339 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1340 store_vector4(inst, machine, result);
1341 if (DEBUG_PROG) {
1342 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
Brian13e3b212007-02-22 16:09:40 -07001343 }
Briane80d9012007-02-23 16:53:24 -07001344 }
1345 break;
1346 case OPCODE_SCS: /* sine and cos */
1347 {
1348 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001349 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001350 result[0] = (GLfloat) _mesa_cos(a[0]);
1351 result[1] = (GLfloat) _mesa_sin(a[0]);
1352 result[2] = 0.0; /* undefined! */
1353 result[3] = 0.0; /* undefined! */
1354 store_vector4(inst, machine, result);
1355 }
1356 break;
1357 case OPCODE_SEQ: /* set on equal */
1358 {
1359 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001360 fetch_vector4(&inst->SrcReg[0], machine, a);
1361 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001362 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1363 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1364 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1365 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1366 store_vector4(inst, machine, result);
Brian28ab1122007-03-06 12:15:30 -07001367 if (DEBUG_PROG) {
1368 printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1369 result[0], result[1], result[2], result[3],
1370 a[0], a[1], a[2], a[3],
1371 b[0], b[1], b[2], b[3]);
1372 }
Briane80d9012007-02-23 16:53:24 -07001373 }
1374 break;
1375 case OPCODE_SFL: /* set false, operands ignored */
1376 {
1377 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1378 store_vector4(inst, machine, result);
1379 }
1380 break;
1381 case OPCODE_SGE: /* set on greater or equal */
1382 {
1383 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001384 fetch_vector4(&inst->SrcReg[0], machine, a);
1385 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001386 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1387 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1388 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1389 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1390 store_vector4(inst, machine, result);
Brian28ab1122007-03-06 12:15:30 -07001391 if (DEBUG_PROG) {
1392 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1393 result[0], result[1], result[2], result[3],
1394 a[0], a[1], a[2], a[3],
1395 b[0], b[1], b[2], b[3]);
1396 }
Briane80d9012007-02-23 16:53:24 -07001397 }
1398 break;
1399 case OPCODE_SGT: /* set on greater */
1400 {
1401 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001402 fetch_vector4(&inst->SrcReg[0], machine, a);
1403 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001404 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1405 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1406 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1407 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1408 store_vector4(inst, machine, result);
1409 if (DEBUG_PROG) {
Brian28ab1122007-03-06 12:15:30 -07001410 printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1411 result[0], result[1], result[2], result[3],
1412 a[0], a[1], a[2], a[3],
1413 b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001414 }
Briane80d9012007-02-23 16:53:24 -07001415 }
1416 break;
1417 case OPCODE_SIN:
1418 {
1419 GLfloat a[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001420 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001421 result[0] = result[1] = result[2] = result[3]
1422 = (GLfloat) _mesa_sin(a[0]);
1423 store_vector4(inst, machine, result);
1424 }
1425 break;
1426 case OPCODE_SLE: /* set on less or equal */
1427 {
1428 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001429 fetch_vector4(&inst->SrcReg[0], machine, a);
1430 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001431 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1432 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1433 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1434 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1435 store_vector4(inst, machine, result);
Brian28ab1122007-03-06 12:15:30 -07001436 if (DEBUG_PROG) {
1437 printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1438 result[0], result[1], result[2], result[3],
1439 a[0], a[1], a[2], a[3],
1440 b[0], b[1], b[2], b[3]);
1441 }
Briane80d9012007-02-23 16:53:24 -07001442 }
1443 break;
1444 case OPCODE_SLT: /* set on less */
1445 {
1446 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001447 fetch_vector4(&inst->SrcReg[0], machine, a);
1448 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001449 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1450 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1451 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1452 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1453 store_vector4(inst, machine, result);
Brian28ab1122007-03-06 12:15:30 -07001454 if (DEBUG_PROG) {
1455 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1456 result[0], result[1], result[2], result[3],
1457 a[0], a[1], a[2], a[3],
1458 b[0], b[1], b[2], b[3]);
1459 }
Briane80d9012007-02-23 16:53:24 -07001460 }
1461 break;
1462 case OPCODE_SNE: /* set on not equal */
1463 {
1464 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001465 fetch_vector4(&inst->SrcReg[0], machine, a);
1466 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001467 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1468 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1469 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1470 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1471 store_vector4(inst, machine, result);
Brian28ab1122007-03-06 12:15:30 -07001472 if (DEBUG_PROG) {
1473 printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1474 result[0], result[1], result[2], result[3],
1475 a[0], a[1], a[2], a[3],
1476 b[0], b[1], b[2], b[3]);
1477 }
Briane80d9012007-02-23 16:53:24 -07001478 }
1479 break;
Brian Paulf6ead502008-11-07 08:51:31 -07001480 case OPCODE_SSG: /* set sign (-1, 0 or +1) */
1481 {
1482 GLfloat a[4], result[4];
1483 fetch_vector4(&inst->SrcReg[0], machine, a);
1484 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1485 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1486 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1487 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1488 store_vector4(inst, machine, result);
1489 }
1490 break;
Briane80d9012007-02-23 16:53:24 -07001491 case OPCODE_STR: /* set true, operands ignored */
1492 {
1493 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1494 store_vector4(inst, machine, result);
1495 }
1496 break;
1497 case OPCODE_SUB:
1498 {
1499 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001500 fetch_vector4(&inst->SrcReg[0], machine, a);
1501 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001502 result[0] = a[0] - b[0];
1503 result[1] = a[1] - b[1];
1504 result[2] = a[2] - b[2];
1505 result[3] = a[3] - b[3];
1506 store_vector4(inst, machine, result);
1507 if (DEBUG_PROG) {
1508 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1509 result[0], result[1], result[2], result[3],
1510 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
Brian13e3b212007-02-22 16:09:40 -07001511 }
Briane80d9012007-02-23 16:53:24 -07001512 }
1513 break;
1514 case OPCODE_SWZ: /* extended swizzle */
1515 {
1516 const struct prog_src_register *source = &inst->SrcReg[0];
Brian Paulf4361542008-11-11 10:47:10 -07001517 const GLfloat *src = get_src_register_pointer(source, machine);
Briane80d9012007-02-23 16:53:24 -07001518 GLfloat result[4];
1519 GLuint i;
1520 for (i = 0; i < 4; i++) {
1521 const GLuint swz = GET_SWZ(source->Swizzle, i);
1522 if (swz == SWIZZLE_ZERO)
1523 result[i] = 0.0;
1524 else if (swz == SWIZZLE_ONE)
1525 result[i] = 1.0;
Brian13e3b212007-02-22 16:09:40 -07001526 else {
Briane80d9012007-02-23 16:53:24 -07001527 ASSERT(swz >= 0);
1528 ASSERT(swz <= 3);
1529 result[i] = src[swz];
Brian13e3b212007-02-22 16:09:40 -07001530 }
Brian Paul7db7ff82009-04-14 22:14:30 -06001531 if (source->Negate & (1 << i))
Briane80d9012007-02-23 16:53:24 -07001532 result[i] = -result[i];
Brian13e3b212007-02-22 16:09:40 -07001533 }
Briane80d9012007-02-23 16:53:24 -07001534 store_vector4(inst, machine, result);
1535 }
1536 break;
1537 case OPCODE_TEX: /* Both ARB and NV frag prog */
Brian999b5562007-11-23 12:01:57 -07001538 /* Simple texel lookup */
Briane80d9012007-02-23 16:53:24 -07001539 {
Brian999b5562007-11-23 12:01:57 -07001540 GLfloat texcoord[4], color[4];
1541 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1542
1543 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1544
Briane80d9012007-02-23 16:53:24 -07001545 if (DEBUG_PROG) {
Brian999b5562007-11-23 12:01:57 -07001546 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
Briane80d9012007-02-23 16:53:24 -07001547 color[0], color[1], color[2], color[3],
1548 inst->TexSrcUnit,
Brian999b5562007-11-23 12:01:57 -07001549 texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
Briane80d9012007-02-23 16:53:24 -07001550 }
1551 store_vector4(inst, machine, color);
1552 }
1553 break;
1554 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1555 /* Texel lookup with LOD bias */
1556 {
Brian999b5562007-11-23 12:01:57 -07001557 GLfloat texcoord[4], color[4], lodBias;
1558
1559 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1560
1561 /* texcoord[3] is the bias to add to lambda */
Brian Paul890f37d2009-09-23 13:34:30 -06001562 lodBias = texcoord[3];
Brian999b5562007-11-23 12:01:57 -07001563
1564 fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1565
Briane80d9012007-02-23 16:53:24 -07001566 store_vector4(inst, machine, color);
1567 }
1568 break;
1569 case OPCODE_TXD: /* GL_NV_fragment_program only */
1570 /* Texture lookup w/ partial derivatives for LOD */
1571 {
1572 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
Brian33eac562007-02-25 18:52:41 -07001573 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1574 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1575 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
Briane80d9012007-02-23 16:53:24 -07001576 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
Brian999b5562007-11-23 12:01:57 -07001577 0.0, /* lodBias */
Briane80d9012007-02-23 16:53:24 -07001578 inst->TexSrcUnit, color);
1579 store_vector4(inst, machine, color);
1580 }
1581 break;
1582 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1583 /* Texture lookup w/ projective divide */
1584 {
Brian999b5562007-11-23 12:01:57 -07001585 GLfloat texcoord[4], color[4];
1586
Brian33eac562007-02-25 18:52:41 -07001587 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
Briane80d9012007-02-23 16:53:24 -07001588 /* Not so sure about this test - if texcoord[3] is
1589 * zero, we'd probably be fine except for an ASSERT in
1590 * IROUND_POS() which gets triggered by the inf values created.
1591 */
1592 if (texcoord[3] != 0.0) {
1593 texcoord[0] /= texcoord[3];
1594 texcoord[1] /= texcoord[3];
1595 texcoord[2] /= texcoord[3];
1596 }
Brian999b5562007-11-23 12:01:57 -07001597
1598 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1599
Briane80d9012007-02-23 16:53:24 -07001600 store_vector4(inst, machine, color);
1601 }
1602 break;
1603 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
Brian999b5562007-11-23 12:01:57 -07001604 /* Texture lookup w/ projective divide, as above, but do not
1605 * do the divide by w if sampling from a cube map.
1606 */
Briane80d9012007-02-23 16:53:24 -07001607 {
Brian999b5562007-11-23 12:01:57 -07001608 GLfloat texcoord[4], color[4];
1609
Brian33eac562007-02-25 18:52:41 -07001610 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
Briane80d9012007-02-23 16:53:24 -07001611 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1612 texcoord[3] != 0.0) {
1613 texcoord[0] /= texcoord[3];
1614 texcoord[1] /= texcoord[3];
1615 texcoord[2] /= texcoord[3];
1616 }
Brian999b5562007-11-23 12:01:57 -07001617
1618 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1619
Briane80d9012007-02-23 16:53:24 -07001620 store_vector4(inst, machine, color);
1621 }
1622 break;
Brian Paul035c0cf2008-11-06 17:14:33 -07001623 case OPCODE_TRUNC: /* truncate toward zero */
1624 {
1625 GLfloat a[4], result[4];
1626 fetch_vector4(&inst->SrcReg[0], machine, a);
1627 result[0] = (GLfloat) (GLint) a[0];
1628 result[1] = (GLfloat) (GLint) a[1];
1629 result[2] = (GLfloat) (GLint) a[2];
1630 result[3] = (GLfloat) (GLint) a[3];
1631 store_vector4(inst, machine, result);
1632 }
1633 break;
Briane80d9012007-02-23 16:53:24 -07001634 case OPCODE_UP2H: /* unpack two 16-bit floats */
1635 {
1636 GLfloat a[4], result[4];
1637 const GLuint *rawBits = (const GLuint *) a;
1638 GLhalfNV hx, hy;
Brian33eac562007-02-25 18:52:41 -07001639 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001640 hx = rawBits[0] & 0xffff;
1641 hy = rawBits[0] >> 16;
1642 result[0] = result[2] = _mesa_half_to_float(hx);
1643 result[1] = result[3] = _mesa_half_to_float(hy);
1644 store_vector4(inst, machine, result);
1645 }
1646 break;
1647 case OPCODE_UP2US: /* unpack two GLushorts */
1648 {
1649 GLfloat a[4], result[4];
1650 const GLuint *rawBits = (const GLuint *) a;
1651 GLushort usx, usy;
Brian33eac562007-02-25 18:52:41 -07001652 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001653 usx = rawBits[0] & 0xffff;
1654 usy = rawBits[0] >> 16;
1655 result[0] = result[2] = usx * (1.0f / 65535.0f);
1656 result[1] = result[3] = usy * (1.0f / 65535.0f);
1657 store_vector4(inst, machine, result);
1658 }
1659 break;
1660 case OPCODE_UP4B: /* unpack four GLbytes */
1661 {
1662 GLfloat a[4], result[4];
1663 const GLuint *rawBits = (const GLuint *) a;
Brian33eac562007-02-25 18:52:41 -07001664 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001665 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1666 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1667 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1668 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1669 store_vector4(inst, machine, result);
1670 }
1671 break;
1672 case OPCODE_UP4UB: /* unpack four GLubytes */
1673 {
1674 GLfloat a[4], result[4];
1675 const GLuint *rawBits = (const GLuint *) a;
Brian33eac562007-02-25 18:52:41 -07001676 fetch_vector1(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001677 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1678 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1679 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1680 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1681 store_vector4(inst, machine, result);
1682 }
1683 break;
Brian Paul37eef7b2008-11-07 09:33:55 -07001684 case OPCODE_XOR: /* bitwise XOR */
1685 {
1686 GLuint a[4], b[4], result[4];
1687 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1688 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1689 result[0] = a[0] ^ b[0];
1690 result[1] = a[1] ^ b[1];
1691 result[2] = a[2] ^ b[2];
1692 result[3] = a[3] ^ b[3];
1693 store_vector4ui(inst, machine, result);
1694 }
1695 break;
Briane80d9012007-02-23 16:53:24 -07001696 case OPCODE_XPD: /* cross product */
1697 {
1698 GLfloat a[4], b[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001699 fetch_vector4(&inst->SrcReg[0], machine, a);
1700 fetch_vector4(&inst->SrcReg[1], machine, b);
Briane80d9012007-02-23 16:53:24 -07001701 result[0] = a[1] * b[2] - a[2] * b[1];
1702 result[1] = a[2] * b[0] - a[0] * b[2];
1703 result[2] = a[0] * b[1] - a[1] * b[0];
1704 result[3] = 1.0;
1705 store_vector4(inst, machine, result);
Brian9637c962007-03-07 17:40:57 -07001706 if (DEBUG_PROG) {
1707 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1708 result[0], result[1], result[2], result[3],
1709 a[0], a[1], a[2], b[0], b[1], b[2]);
1710 }
Briane80d9012007-02-23 16:53:24 -07001711 }
1712 break;
1713 case OPCODE_X2D: /* 2-D matrix transform */
1714 {
1715 GLfloat a[4], b[4], c[4], result[4];
Brian33eac562007-02-25 18:52:41 -07001716 fetch_vector4(&inst->SrcReg[0], machine, a);
1717 fetch_vector4(&inst->SrcReg[1], machine, b);
1718 fetch_vector4(&inst->SrcReg[2], machine, c);
Briane80d9012007-02-23 16:53:24 -07001719 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1720 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1721 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1722 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1723 store_vector4(inst, machine, result);
1724 }
1725 break;
1726 case OPCODE_PRINT:
1727 {
1728 if (inst->SrcReg[0].File != -1) {
1729 GLfloat a[4];
Brian33eac562007-02-25 18:52:41 -07001730 fetch_vector4(&inst->SrcReg[0], machine, a);
Briane80d9012007-02-23 16:53:24 -07001731 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1732 a[0], a[1], a[2], a[3]);
1733 }
1734 else {
1735 _mesa_printf("%s\n", (const char *) inst->Data);
1736 }
1737 }
1738 break;
1739 case OPCODE_END:
1740 return GL_TRUE;
1741 default:
Alan Hourihane936dba12008-04-22 20:28:35 +01001742 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
Briane80d9012007-02-23 16:53:24 -07001743 inst->Opcode);
1744 return GL_TRUE; /* return value doesn't matter */
Brian13e3b212007-02-22 16:09:40 -07001745 }
Briane80d9012007-02-23 16:53:24 -07001746
Briancfd00112007-02-25 18:30:45 -07001747 numExec++;
1748 if (numExec > maxExec) {
Brian13e3b212007-02-22 16:09:40 -07001749 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1750 return GL_TRUE;
Brian13e3b212007-02-22 16:09:40 -07001751 }
Briane80d9012007-02-23 16:53:24 -07001752
1753 } /* for pc */
Brian13e3b212007-02-22 16:09:40 -07001754
Brian13e3b212007-02-22 16:09:40 -07001755 return GL_TRUE;
1756}