| /* |
| * Mesa 3-D graphics library |
| * Version: 6.0.1 |
| * |
| * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
| * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /** |
| * \file nvvertexec.c |
| * Code to execute vertex programs. |
| * \author Brian Paul |
| */ |
| |
| #include "glheader.h" |
| #include "context.h" |
| #include "imports.h" |
| #include "macros.h" |
| #include "mtypes.h" |
| #include "nvvertexec.h" |
| #include "nvvertprog.h" |
| #include "program.h" |
| #include "math/m_matrix.h" |
| |
| |
| static const GLfloat zeroVec[4] = { 0, 0, 0, 0 }; |
| |
| |
| /** |
| * Load/initialize the vertex program registers. |
| * This needs to be done per vertex. |
| */ |
| void |
| _mesa_init_vp_registers(GLcontext *ctx) |
| { |
| GLuint i; |
| |
| /* Input registers get initialized from the current vertex attribs */ |
| MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib, |
| VERT_ATTRIB_MAX * 4 * sizeof(GLfloat)); |
| |
| /* Output and temp regs are initialized to [0,0,0,1] */ |
| for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) { |
| ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F); |
| } |
| for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) { |
| ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F); |
| } |
| |
| /* The program parameters aren't touched */ |
| /* XXX: This should be moved to glBegin() time, but its safe (and slow!) |
| * here - Karl |
| */ |
| if (ctx->VertexProgram.Current->Parameters) { |
| /* Grab the state */ |
| _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters); |
| |
| /* And copy it into the program state */ |
| for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) { |
| MEMCPY(ctx->VertexProgram.Parameters[i], |
| &ctx->VertexProgram.Current->Parameters->Parameters[i].Values, |
| 4*sizeof(GLfloat)); |
| } |
| } |
| } |
| |
| |
| |
| /** |
| * Copy the 16 elements of a matrix into four consecutive program |
| * registers starting at 'pos'. |
| */ |
| static void |
| load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16]) |
| { |
| GLuint i; |
| for (i = 0; i < 4; i++) { |
| registers[pos + i][0] = mat[0 + i]; |
| registers[pos + i][1] = mat[4 + i]; |
| registers[pos + i][2] = mat[8 + i]; |
| registers[pos + i][3] = mat[12 + i]; |
| } |
| } |
| |
| |
| /** |
| * As above, but transpose the matrix. |
| */ |
| static void |
| load_transpose_matrix(GLfloat registers[][4], GLuint pos, |
| const GLfloat mat[16]) |
| { |
| MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat)); |
| } |
| |
| |
| /** |
| * Load all currently tracked matrices into the program registers. |
| * This needs to be done per glBegin/glEnd. |
| */ |
| void |
| _mesa_init_tracked_matrices(GLcontext *ctx) |
| { |
| GLuint i; |
| |
| for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) { |
| /* point 'mat' at source matrix */ |
| GLmatrix *mat; |
| if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) { |
| mat = ctx->ModelviewMatrixStack.Top; |
| } |
| else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) { |
| mat = ctx->ProjectionMatrixStack.Top; |
| } |
| else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) { |
| mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top; |
| } |
| else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) { |
| mat = ctx->ColorMatrixStack.Top; |
| } |
| else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) { |
| /* XXX verify the combined matrix is up to date */ |
| mat = &ctx->_ModelProjectMatrix; |
| } |
| else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV && |
| ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) { |
| GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV; |
| ASSERT(n < MAX_PROGRAM_MATRICES); |
| mat = ctx->ProgramMatrixStack[n].Top; |
| } |
| else { |
| /* no matrix is tracked, but we leave the register values as-is */ |
| assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE); |
| continue; |
| } |
| |
| /* load the matrix */ |
| if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) { |
| load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m); |
| } |
| else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) { |
| _math_matrix_analyse(mat); /* update the inverse */ |
| assert((mat->flags & MAT_DIRTY_INVERSE) == 0); |
| load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv); |
| } |
| else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) { |
| load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m); |
| } |
| else { |
| assert(ctx->VertexProgram.TrackMatrixTransform[i] |
| == GL_INVERSE_TRANSPOSE_NV); |
| _math_matrix_analyse(mat); /* update the inverse */ |
| assert((mat->flags & MAT_DIRTY_INVERSE) == 0); |
| load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv); |
| } |
| } |
| } |
| |
| |
| |
| /** |
| * For debugging. Dump the current vertex program machine registers. |
| */ |
| void |
| _mesa_dump_vp_state( const struct vertex_program_state *state ) |
| { |
| int i; |
| _mesa_printf("VertexIn:\n"); |
| for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) { |
| _mesa_printf("%d: %f %f %f %f ", i, |
| state->Inputs[i][0], |
| state->Inputs[i][1], |
| state->Inputs[i][2], |
| state->Inputs[i][3]); |
| } |
| _mesa_printf("\n"); |
| |
| _mesa_printf("VertexOut:\n"); |
| for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) { |
| _mesa_printf("%d: %f %f %f %f ", i, |
| state->Outputs[i][0], |
| state->Outputs[i][1], |
| state->Outputs[i][2], |
| state->Outputs[i][3]); |
| } |
| _mesa_printf("\n"); |
| |
| _mesa_printf("Registers:\n"); |
| for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) { |
| _mesa_printf("%d: %f %f %f %f ", i, |
| state->Temporaries[i][0], |
| state->Temporaries[i][1], |
| state->Temporaries[i][2], |
| state->Temporaries[i][3]); |
| } |
| _mesa_printf("\n"); |
| |
| _mesa_printf("Parameters:\n"); |
| for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) { |
| _mesa_printf("%d: %f %f %f %f ", i, |
| state->Parameters[i][0], |
| state->Parameters[i][1], |
| state->Parameters[i][2], |
| state->Parameters[i][3]); |
| } |
| _mesa_printf("\n"); |
| } |
| |
| |
| |
| /** |
| * Return a pointer to the 4-element float vector specified by the given |
| * source register. |
| */ |
| static INLINE const GLfloat * |
| get_register_pointer( const struct vp_src_register *source, |
| const struct vertex_program_state *state ) |
| { |
| if (source->RelAddr) { |
| const GLint reg = source->Index + state->AddressReg[0]; |
| ASSERT( (source->File == PROGRAM_ENV_PARAM) || |
| (source->File == PROGRAM_STATE_VAR) ); |
| if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS) |
| return zeroVec; |
| else |
| return state->Parameters[reg]; |
| } |
| else { |
| switch (source->File) { |
| case PROGRAM_TEMPORARY: |
| return state->Temporaries[source->Index]; |
| case PROGRAM_INPUT: |
| return state->Inputs[source->Index]; |
| case PROGRAM_LOCAL_PARAM: |
| /* XXX fix */ |
| return state->Temporaries[source->Index]; |
| case PROGRAM_ENV_PARAM: |
| return state->Parameters[source->Index]; |
| case PROGRAM_STATE_VAR: |
| return state->Parameters[source->Index]; |
| default: |
| _mesa_problem(NULL, |
| "Bad source register file in fetch_vector4(vp)"); |
| return NULL; |
| } |
| } |
| return NULL; |
| } |
| |
| |
| /** |
| * Fetch a 4-element float vector from the given source register. |
| * Apply swizzling and negating as needed. |
| */ |
| static INLINE void |
| fetch_vector4( const struct vp_src_register *source, |
| const struct vertex_program_state *state, |
| GLfloat result[4] ) |
| { |
| const GLfloat *src = get_register_pointer(source, state); |
| |
| if (source->Negate) { |
| result[0] = -src[source->Swizzle[0]]; |
| result[1] = -src[source->Swizzle[1]]; |
| result[2] = -src[source->Swizzle[2]]; |
| result[3] = -src[source->Swizzle[3]]; |
| } |
| else { |
| result[0] = src[source->Swizzle[0]]; |
| result[1] = src[source->Swizzle[1]]; |
| result[2] = src[source->Swizzle[2]]; |
| result[3] = src[source->Swizzle[3]]; |
| } |
| } |
| |
| |
| |
| /** |
| * As above, but only return result[0] element. |
| */ |
| static INLINE void |
| fetch_vector1( const struct vp_src_register *source, |
| const struct vertex_program_state *state, |
| GLfloat result[4] ) |
| { |
| const GLfloat *src = get_register_pointer(source, state); |
| |
| if (source->Negate) { |
| result[0] = -src[source->Swizzle[0]]; |
| } |
| else { |
| result[0] = src[source->Swizzle[0]]; |
| } |
| } |
| |
| |
| /** |
| * Store 4 floats into a register. |
| */ |
| static void |
| store_vector4( const struct vp_dst_register *dest, |
| struct vertex_program_state *state, |
| const GLfloat value[4] ) |
| { |
| GLfloat *dst; |
| switch (dest->File) { |
| case PROGRAM_TEMPORARY: |
| dst = state->Temporaries[dest->Index]; |
| break; |
| case PROGRAM_OUTPUT: |
| dst = state->Outputs[dest->Index]; |
| break; |
| case PROGRAM_ENV_PARAM: |
| { |
| /* a slight hack */ |
| GET_CURRENT_CONTEXT(ctx); |
| dst = ctx->VertexProgram.Parameters[dest->Index]; |
| } |
| break; |
| default: |
| _mesa_problem(NULL, "Invalid register file in store_vector4(file=%d)", |
| dest->File); |
| return; |
| } |
| |
| if (dest->WriteMask[0]) |
| dst[0] = value[0]; |
| if (dest->WriteMask[1]) |
| dst[1] = value[1]; |
| if (dest->WriteMask[2]) |
| dst[2] = value[2]; |
| if (dest->WriteMask[3]) |
| dst[3] = value[3]; |
| } |
| |
| |
| /** |
| * Set x to positive or negative infinity. |
| */ |
| #if defined(USE_IEEE) || defined(_WIN32) |
| #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 ) |
| #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 ) |
| #elif defined(VMS) |
| #define SET_POS_INFINITY(x) x = __MAXFLOAT |
| #define SET_NEG_INFINITY(x) x = -__MAXFLOAT |
| #else |
| #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL |
| #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL |
| #endif |
| |
| #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits |
| |
| |
| /** |
| * Execute the given vertex program |
| */ |
| void |
| _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program) |
| { |
| struct vertex_program_state *state = &ctx->VertexProgram; |
| const struct vp_instruction *inst; |
| |
| ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */ |
| |
| /* If the program is position invariant, multiply the input |
| * position and the MVP matrix and stick it into the output pos slot |
| */ |
| if (ctx->VertexProgram.Current->IsPositionInvariant) { |
| TRANSFORM_POINT( ctx->VertexProgram.Outputs[0], |
| ctx->_ModelProjectMatrix.m, |
| ctx->VertexProgram.Inputs[0]); |
| |
| /* XXX: This could go elsewhere */ |
| ctx->VertexProgram.Current->OutputsWritten |= 0x1; |
| } |
| |
| for (inst = program->Instructions; /*inst->Opcode != VP_OPCODE_END*/; inst++) { |
| |
| if (ctx->VertexProgram.CallbackEnabled && |
| ctx->VertexProgram.Callback) { |
| ctx->VertexProgram.CurrentPosition = inst->StringPos; |
| ctx->VertexProgram.Callback(program->Base.Target, |
| ctx->VertexProgram.CallbackData); |
| } |
| |
| switch (inst->Opcode) { |
| case VP_OPCODE_MOV: |
| { |
| GLfloat t[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_LIT: |
| { |
| const GLfloat epsilon = 1.0e-5F; /* XXX fix? */ |
| GLfloat t[4], lit[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| if (t[3] < -(128.0F - epsilon)) |
| t[3] = - (128.0F - epsilon); |
| else if (t[3] > 128.0F - epsilon) |
| t[3] = 128.0F - epsilon; |
| if (t[0] < 0.0) |
| t[0] = 0.0; |
| if (t[1] < 0.0) |
| t[1] = 0.0; |
| lit[0] = 1.0; |
| lit[1] = t[0]; |
| lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F; |
| lit[3] = 1.0; |
| store_vector4( &inst->DstReg, state, lit ); |
| } |
| break; |
| case VP_OPCODE_RCP: |
| { |
| GLfloat t[4]; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| if (t[0] != 1.0F) |
| t[0] = 1.0F / t[0]; /* div by zero is infinity! */ |
| t[1] = t[2] = t[3] = t[0]; |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_RSQ: |
| { |
| GLfloat t[4]; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| t[0] = INV_SQRTF(FABSF(t[0])); |
| t[1] = t[2] = t[3] = t[0]; |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_EXP: |
| { |
| GLfloat t[4], q[4], floor_t0; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| floor_t0 = (float) floor(t[0]); |
| if (floor_t0 > FLT_MAX_EXP) { |
| SET_POS_INFINITY(q[0]); |
| SET_POS_INFINITY(q[2]); |
| } |
| else if (floor_t0 < FLT_MIN_EXP) { |
| q[0] = 0.0F; |
| q[2] = 0.0F; |
| } |
| else { |
| #ifdef USE_IEEE |
| GLint ii = (GLint) floor_t0; |
| ii = (ii < 23) + 0x3f800000; |
| SET_FLOAT_BITS(q[0], ii); |
| q[0] = *((GLfloat *) &ii); |
| #else |
| q[0] = (GLfloat) pow(2.0, floor_t0); |
| #endif |
| q[2] = (GLfloat) (q[0] * LOG2(q[1])); |
| } |
| q[1] = t[0] - floor_t0; |
| q[3] = 1.0F; |
| store_vector4( &inst->DstReg, state, q ); |
| } |
| break; |
| case VP_OPCODE_LOG: |
| { |
| GLfloat t[4], q[4], abs_t0; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| abs_t0 = (GLfloat) fabs(t[0]); |
| if (abs_t0 != 0.0F) { |
| /* Since we really can't handle infinite values on VMS |
| * like other OSes we'll use __MAXFLOAT to represent |
| * infinity. This may need some tweaking. |
| */ |
| #ifdef VMS |
| if (abs_t0 == __MAXFLOAT) |
| #else |
| if (IS_INF_OR_NAN(abs_t0)) |
| #endif |
| { |
| SET_POS_INFINITY(q[0]); |
| q[1] = 1.0F; |
| SET_POS_INFINITY(q[2]); |
| } |
| else { |
| int exponent; |
| double mantissa = frexp(t[0], &exponent); |
| q[0] = (GLfloat) (exponent - 1); |
| q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ |
| q[2] = (GLfloat) (q[0] + LOG2(q[1])); |
| } |
| } |
| else { |
| SET_NEG_INFINITY(q[0]); |
| q[1] = 1.0F; |
| SET_NEG_INFINITY(q[2]); |
| } |
| q[3] = 1.0; |
| store_vector4( &inst->DstReg, state, q ); |
| } |
| break; |
| case VP_OPCODE_MUL: |
| { |
| GLfloat t[4], u[4], prod[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| prod[0] = t[0] * u[0]; |
| prod[1] = t[1] * u[1]; |
| prod[2] = t[2] * u[2]; |
| prod[3] = t[3] * u[3]; |
| store_vector4( &inst->DstReg, state, prod ); |
| } |
| break; |
| case VP_OPCODE_ADD: |
| { |
| GLfloat t[4], u[4], sum[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| sum[0] = t[0] + u[0]; |
| sum[1] = t[1] + u[1]; |
| sum[2] = t[2] + u[2]; |
| sum[3] = t[3] + u[3]; |
| store_vector4( &inst->DstReg, state, sum ); |
| } |
| break; |
| case VP_OPCODE_DP3: |
| { |
| GLfloat t[4], u[4], dot[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2]; |
| dot[1] = dot[2] = dot[3] = dot[0]; |
| store_vector4( &inst->DstReg, state, dot ); |
| } |
| break; |
| case VP_OPCODE_DP4: |
| { |
| GLfloat t[4], u[4], dot[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3]; |
| dot[1] = dot[2] = dot[3] = dot[0]; |
| store_vector4( &inst->DstReg, state, dot ); |
| } |
| break; |
| case VP_OPCODE_DST: |
| { |
| GLfloat t[4], u[4], dst[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| dst[0] = 1.0F; |
| dst[1] = t[1] * u[1]; |
| dst[2] = t[2]; |
| dst[3] = u[3]; |
| store_vector4( &inst->DstReg, state, dst ); |
| } |
| break; |
| case VP_OPCODE_MIN: |
| { |
| GLfloat t[4], u[4], min[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| min[0] = (t[0] < u[0]) ? t[0] : u[0]; |
| min[1] = (t[1] < u[1]) ? t[1] : u[1]; |
| min[2] = (t[2] < u[2]) ? t[2] : u[2]; |
| min[3] = (t[3] < u[3]) ? t[3] : u[3]; |
| store_vector4( &inst->DstReg, state, min ); |
| } |
| break; |
| case VP_OPCODE_MAX: |
| { |
| GLfloat t[4], u[4], max[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| max[0] = (t[0] > u[0]) ? t[0] : u[0]; |
| max[1] = (t[1] > u[1]) ? t[1] : u[1]; |
| max[2] = (t[2] > u[2]) ? t[2] : u[2]; |
| max[3] = (t[3] > u[3]) ? t[3] : u[3]; |
| store_vector4( &inst->DstReg, state, max ); |
| } |
| break; |
| case VP_OPCODE_SLT: |
| { |
| GLfloat t[4], u[4], slt[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F; |
| slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F; |
| slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F; |
| slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F; |
| store_vector4( &inst->DstReg, state, slt ); |
| } |
| break; |
| case VP_OPCODE_SGE: |
| { |
| GLfloat t[4], u[4], sge[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F; |
| sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F; |
| sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F; |
| sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F; |
| store_vector4( &inst->DstReg, state, sge ); |
| } |
| break; |
| case VP_OPCODE_MAD: |
| { |
| GLfloat t[4], u[4], v[4], sum[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| fetch_vector4( &inst->SrcReg[2], state, v ); |
| sum[0] = t[0] * u[0] + v[0]; |
| sum[1] = t[1] * u[1] + v[1]; |
| sum[2] = t[2] * u[2] + v[2]; |
| sum[3] = t[3] * u[3] + v[3]; |
| store_vector4( &inst->DstReg, state, sum ); |
| } |
| break; |
| case VP_OPCODE_ARL: |
| { |
| GLfloat t[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| state->AddressReg[0] = (GLint) floor(t[0]); |
| } |
| break; |
| case VP_OPCODE_DPH: |
| { |
| GLfloat t[4], u[4], dot[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3]; |
| dot[1] = dot[2] = dot[3] = dot[0]; |
| store_vector4( &inst->DstReg, state, dot ); |
| } |
| break; |
| case VP_OPCODE_RCC: |
| { |
| GLfloat t[4], u; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| if (t[0] == 1.0F) |
| u = 1.0F; |
| else |
| u = 1.0F / t[0]; |
| if (u > 0.0F) { |
| if (u > 1.884467e+019F) { |
| u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */ |
| } |
| else if (u < 5.42101e-020F) { |
| u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */ |
| } |
| } |
| else { |
| if (u < -1.884467e+019F) { |
| u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */ |
| } |
| else if (u > -5.42101e-020F) { |
| u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */ |
| } |
| } |
| t[0] = t[1] = t[2] = t[3] = u; |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */ |
| { |
| GLfloat t[4], u[4], sum[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| sum[0] = t[0] - u[0]; |
| sum[1] = t[1] - u[1]; |
| sum[2] = t[2] - u[2]; |
| sum[3] = t[3] - u[3]; |
| store_vector4( &inst->DstReg, state, sum ); |
| } |
| break; |
| case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */ |
| { |
| GLfloat t[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| if (t[0] < 0.0) t[0] = -t[0]; |
| if (t[1] < 0.0) t[1] = -t[1]; |
| if (t[2] < 0.0) t[2] = -t[2]; |
| if (t[3] < 0.0) t[3] = -t[3]; |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_FLR: /* GL_ARB_vertex_program */ |
| { |
| GLfloat t[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| t[0] = FLOORF(t[0]); |
| t[1] = FLOORF(t[1]); |
| t[2] = FLOORF(t[2]); |
| t[3] = FLOORF(t[3]); |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_FRC: /* GL_ARB_vertex_program */ |
| { |
| GLfloat t[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| t[0] = t[0] - FLOORF(t[0]); |
| t[1] = t[1] - FLOORF(t[1]); |
| t[2] = t[2] - FLOORF(t[2]); |
| t[3] = t[3] - FLOORF(t[3]); |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_EX2: /* GL_ARB_vertex_program */ |
| { |
| GLfloat t[4]; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]); |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_LG2: /* GL_ARB_vertex_program */ |
| { |
| GLfloat t[4]; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| t[0] = t[1] = t[2] = t[3] = LOG2(t[0]); |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_POW: /* GL_ARB_vertex_program */ |
| { |
| GLfloat t[4], u[4]; |
| fetch_vector1( &inst->SrcReg[0], state, t ); |
| fetch_vector1( &inst->SrcReg[1], state, u ); |
| t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]); |
| store_vector4( &inst->DstReg, state, t ); |
| } |
| break; |
| case VP_OPCODE_XPD: /* GL_ARB_vertex_program */ |
| { |
| GLfloat t[4], u[4], cross[4]; |
| fetch_vector4( &inst->SrcReg[0], state, t ); |
| fetch_vector4( &inst->SrcReg[1], state, u ); |
| cross[0] = t[1] * u[2] - t[2] * u[1]; |
| cross[1] = t[2] * u[0] - t[0] * u[2]; |
| cross[2] = t[0] * u[1] - t[1] * u[0]; |
| store_vector4( &inst->DstReg, state, cross ); |
| } |
| break; |
| case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */ |
| { |
| const struct vp_src_register *source = &inst->SrcReg[0]; |
| const GLfloat *src = get_register_pointer(source, state); |
| GLfloat result[4]; |
| GLuint i; |
| |
| /* do extended swizzling here */ |
| for (i = 0; i < 3; i++) { |
| if (source->Swizzle[i] == SWIZZLE_ZERO) |
| result[i] = 0.0; |
| else if (source->Swizzle[i] == SWIZZLE_ONE) |
| result[i] = -1.0; |
| else |
| result[i] = -src[source->Swizzle[i]]; |
| if (source->Negate) |
| result[i] = -result[i]; |
| } |
| store_vector4( &inst->DstReg, state, result ); |
| } |
| break; |
| |
| case VP_OPCODE_END: |
| ctx->_CurrentProgram = 0; |
| return; |
| default: |
| /* bad instruction opcode */ |
| _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program"); |
| ctx->_CurrentProgram = 0; |
| return; |
| } /* switch */ |
| } /* for */ |
| |
| ctx->_CurrentProgram = 0; |
| } |
| |
| |
| |
| /** |
| Thoughts on vertex program optimization: |
| |
| The obvious thing to do is to compile the vertex program into X86/SSE/3DNow! |
| assembly code. That will probably be a lot of work. |
| |
| Another approach might be to replace the vp_instruction->Opcode field with |
| a pointer to a specialized C function which executes the instruction. |
| In particular we can write functions which skip swizzling, negating, |
| masking, relative addressing, etc. when they're not needed. |
| |
| For example: |
| |
| void simple_add( struct vp_instruction *inst ) |
| { |
| GLfloat *sum = machine->Registers[inst->DstReg.Register]; |
| GLfloat *a = machine->Registers[inst->SrcReg[0].Register]; |
| GLfloat *b = machine->Registers[inst->SrcReg[1].Register]; |
| sum[0] = a[0] + b[0]; |
| sum[1] = a[1] + b[1]; |
| sum[2] = a[2] + b[2]; |
| sum[3] = a[3] + b[3]; |
| } |
| |
| */ |
| |
| /* |
| |
| KW: |
| |
| A first step would be to 'vectorize' the programs in the same way as |
| the normal transformation code in the tnl module. Thus each opcode |
| takes zero or more input vectors (registers) and produces one or more |
| output vectors. |
| |
| These operations would intially be coded in C, with machine-specific |
| assembly following, as is currently the case for matrix |
| transformations in the math/ directory. The preprocessing scheme for |
| selecting simpler operations Brian describes above would also work |
| here. |
| |
| This should give reasonable performance without excessive effort. |
| |
| */ |