blob: ffc6068bd88cbbf12076ced3ed4272b7a9b01c0c [file] [log] [blame]
/*
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Authors:
* Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
*/
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "main/imports.h"
#include "main/mtypes.h"
#include "tnl/t_context.h"
#include "shader/program.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "radeon_debug.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
#include "shader/programopt.c"
#include "r700_debug.h"
#include "r700_vertprog.h"
unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
struct gl_vertex_program *mesa_vp,
unsigned int unStart)
{
unsigned int i;
unsigned int unBit;
unsigned int unTotal = unStart;
//!!!!!!! THE ORDER MATCH FS INPUT
unBit = 1 << VERT_RESULT_HPOS;
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
}
unBit = 1 << VERT_RESULT_COL0;
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
}
unBit = 1 << VERT_RESULT_COL1;
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
}
//TODO : dealing back face.
unBit = 1 << VERT_RESULT_BFC0;
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
}
unBit = 1 << VERT_RESULT_BFC1;
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
}
//TODO : dealing fog.
unBit = 1 << VERT_RESULT_FOGC;
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
}
//TODO : dealing point size.
unBit = 1 << VERT_RESULT_PSIZ;
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
}
for(i=0; i<8; i++)
{
unBit = 1 << (VERT_RESULT_TEX0 + i);
if(mesa_vp->Base.OutputsWritten & unBit)
{
pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
}
}
return (unTotal - unStart);
}
unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
struct gl_vertex_program *mesa_vp,
unsigned int unStart)
{
int i;
unsigned int unBit;
unsigned int unTotal = unStart;
for(i=0; i<VERT_ATTRIB_MAX; i++)
{
unBit = 1 << i;
if(mesa_vp->Base.InputsRead & unBit)
{
pAsm->ucVP_AttributeMap[i] = unTotal++;
}
}
return (unTotal - unStart);
}
GLboolean Process_Vertex_Program_Vfetch_Instructions(
struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp)
{
int i;
unsigned int unBit;
VTX_FETCH_METHOD vtxFetchMethod;
vtxFetchMethod.bEnableMini = GL_FALSE;
vtxFetchMethod.mega_fetch_remainder = 0;
for(i=0; i<VERT_ATTRIB_MAX; i++)
{
unBit = 1 << i;
if(mesa_vp->Base.InputsRead & unBit)
{
assemble_vfetch_instruction(&vp->r700AsmCode,
i,
vp->r700AsmCode.ucVP_AttributeMap[i],
vp->aos_desc[i].size,
vp->aos_desc[i].type,
&vtxFetchMethod);
}
}
return GL_TRUE;
}
GLboolean Process_Vertex_Program_Vfetch_Instructions2(
GLcontext *ctx,
struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp)
{
int i;
context_t *context = R700_CONTEXT(ctx);
VTX_FETCH_METHOD vtxFetchMethod;
vtxFetchMethod.bEnableMini = GL_FALSE;
vtxFetchMethod.mega_fetch_remainder = 0;
for(i=0; i<context->nNumActiveAos; i++)
{
assemble_vfetch_instruction2(&vp->r700AsmCode,
vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
context->stream_desc[i].type,
context->stream_desc[i].size,
context->stream_desc[i].element,
context->stream_desc[i]._signed,
context->stream_desc[i].normalize,
&vtxFetchMethod);
}
return GL_TRUE;
}
void Map_Vertex_Program(GLcontext *ctx,
struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp)
{
GLuint ui;
r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
unsigned int num_inputs;
// R0 will always be used for index into vertex buffer
pAsm->number_used_registers = 1;
pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
// Map Inputs: Add 1 to mapping since R0 is used for index
num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
pAsm->number_used_registers += num_inputs;
// Create VFETCH instructions for inputs
if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
{
radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
return;
}
// Map Outputs
pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
pAsm->starting_export_register_number = pAsm->number_used_registers;
pAsm->number_used_registers += pAsm->number_of_exports;
pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
for(ui=0; ui<pAsm->number_of_exports; ui++)
{
pAsm->pucOutMask[ui] = 0x0;
}
/* Map temporary registers (GPRs) */
pAsm->starting_temp_register_number = pAsm->number_used_registers;
if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
{ /* arb uses NumNativeTemporaries */
pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
}
else
{ /* fix func t_vp uses NumTemporaries */
pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
}
pAsm->uFirstHelpReg = pAsm->number_used_registers;
}
GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp)
{
GLuint i, j;
GLint * puiTEMPwrites;
struct prog_instruction *pILInst;
InstDeps *pInstDeps;
puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
{
puiTEMPwrites[i] = -1;
}
pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
for(i=0; i<mesa_vp->Base.NumInstructions; i++)
{
pInstDeps[i].nDstDep = -1;
pILInst = &(mesa_vp->Base.Instructions[i]);
//Dst
if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
{
//Set lastwrite for the temp
puiTEMPwrites[pILInst->DstReg.Index] = i;
}
//Src
for(j=0; j<3; j++)
{
if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
{
//Set dep.
pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
}
else
{
pInstDeps[i].nSrcDeps[j] = -1;
}
}
}
vp->r700AsmCode.pInstDeps = pInstDeps;
FREE(puiTEMPwrites);
return GL_TRUE;
}
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
struct gl_vertex_program *mesa_vp)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program *vp;
unsigned int i;
vp = _mesa_calloc(sizeof(*vp));
vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);
if (mesa_vp->IsPositionInvariant)
{
_mesa_insert_mvp_code(ctx, vp->mesa_program);
}
for(i=0; i<context->nNumActiveAos; i++)
{
vp->aos_desc[i].size = context->stream_desc[i].size;
vp->aos_desc[i].stride = context->stream_desc[i].stride;
vp->aos_desc[i].type = context->stream_desc[i].type;
}
if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
{
vp->r700AsmCode.bR6xx = 1;
}
//Init_Program
Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
Map_Vertex_Program(ctx, vp, vp->mesa_program );
if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
{
return NULL;
}
if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
&(vp->mesa_program->Base.Instructions[0]),
&(vp->r700AsmCode)) )
{
return NULL;
}
if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
{
return NULL;
}
vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
: (vp->r700AsmCode.number_used_registers - 1);
vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
vp->translated = GL_TRUE;
return vp;
}
void r700SelectVertexShader(GLcontext *ctx)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program_cont *vpc;
struct r700_vertex_program *vp;
unsigned int i;
GLboolean match;
GLbitfield InputsRead;
vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
InputsRead = vpc->mesa_program.Base.InputsRead;
if (vpc->mesa_program.IsPositionInvariant)
{
InputsRead |= VERT_BIT_POS;
}
for (vp = vpc->progs; vp; vp = vp->next)
{
match = GL_TRUE;
for(i=0; i<context->nNumActiveAos; i++)
{
if (vp->aos_desc[i].size != context->stream_desc[i].size)
{
match = GL_FALSE;
break;
}
}
if (match)
{
context->selected_vp = vp;
return;
}
}
vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
if(!vp)
{
radeon_error("Failed to translate vertex shader. \n");
return;
}
vp->next = vpc->progs;
vpc->progs = vp;
context->selected_vp = vp;
return;
}
int getTypeSize(GLenum type)
{
switch (type)
{
case GL_DOUBLE:
return sizeof(GLdouble);
case GL_FLOAT:
return sizeof(GLfloat);
case GL_INT:
return sizeof(GLint);
case GL_UNSIGNED_INT:
return sizeof(GLuint);
case GL_SHORT:
return sizeof(GLshort);
case GL_UNSIGNED_SHORT:
return sizeof(GLushort);
case GL_BYTE:
return sizeof(GLbyte);
case GL_UNSIGNED_BYTE:
return sizeof(GLubyte);
default:
assert(0);
return 0;
}
}
static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
{
context_t *context = R700_CONTEXT(ctx);
StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
GLuint stride;
stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
: input->StrideB;
if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
#if MESA_BIG_ENDIAN
getTypeSize(input->Type) != 4 ||
#endif
stride < 4)
{
pStreamDesc->type = GL_FLOAT;
if (input->StrideB == 0)
{
pStreamDesc->stride = 0;
}
else
{
pStreamDesc->stride = sizeof(GLfloat) * input->Size;
}
pStreamDesc->dwords = input->Size;
pStreamDesc->is_named_bo = GL_FALSE;
}
else
{
pStreamDesc->type = input->Type;
pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
if (!input->BufferObj->Name)
{
if (input->StrideB == 0)
{
pStreamDesc->stride = 0;
}
else
{
pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
}
pStreamDesc->is_named_bo = GL_FALSE;
}
}
pStreamDesc->size = input->Size;
pStreamDesc->dst_loc = context->nNumActiveAos;
pStreamDesc->element = unLoc;
switch (pStreamDesc->type)
{ //GetSurfaceFormat
case GL_FLOAT:
pStreamDesc->_signed = 0;
pStreamDesc->normalize = GL_FALSE;
break;
case GL_SHORT:
pStreamDesc->_signed = 1;
pStreamDesc->normalize = input->Normalized;
break;
case GL_BYTE:
pStreamDesc->_signed = 1;
pStreamDesc->normalize = input->Normalized;
break;
case GL_UNSIGNED_SHORT:
pStreamDesc->_signed = 0;
pStreamDesc->normalize = input->Normalized;
break;
case GL_UNSIGNED_BYTE:
pStreamDesc->_signed = 0;
pStreamDesc->normalize = input->Normalized;
break;
default:
case GL_INT:
case GL_UNSIGNED_INT:
case GL_DOUBLE:
assert(0);
break;
}
context->nNumActiveAos++;
}
void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program *vpc
= (struct r700_vertex_program *)ctx->VertexProgram._Current;
struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
unsigned int unLoc = 0;
unsigned int unBit = mesa_vp->Base.InputsRead;
context->nNumActiveAos = 0;
if (mesa_vp->IsPositionInvariant)
{
unBit |= VERT_BIT_POS;
}
while(unBit)
{
if(unBit & 1)
{
r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
}
unBit >>= 1;
++unLoc;
}
context->radeon.tcl.aos_count = context->nNumActiveAos;
}
void * r700GetActiveVpShaderBo(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program *vp = context->selected_vp;;
if (vp)
return vp->shaderbo;
else
return NULL;
}
GLboolean r700SetupVertexProgram(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
struct r700_vertex_program *vp = context->selected_vp;
struct gl_program_parameter_list *paramList;
unsigned int unNumParamData;
unsigned int ui;
if(GL_FALSE == vp->loaded)
{
if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
{
Assemble( &(vp->r700Shader) );
}
/* Load vp to gpu */
r600EmitShader(ctx,
&(vp->shaderbo),
(GLvoid *)(vp->r700Shader.pProgram),
vp->r700Shader.uShaderBinaryDWORDSize,
"VS");
vp->loaded = GL_TRUE;
}
DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
vp->r700Shader.uShaderBinaryDWORDSize);
/* TODO : enable this after MemUse fixed *=
(context->chipobj.MemUse)(context, vp->shadercode.buf->id);
*/
R600_STATECHANGE(context, vs);
R600_STATECHANGE(context, fs); /* hack */
r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
NUM_GPRS_shift, NUM_GPRS_mask);
if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
{
SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
STACK_SIZE_shift, STACK_SIZE_mask);
}
R600_STATECHANGE(context, spi);
SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
NUM_INTERP_shift, NUM_INTERP_mask);
/*
SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
*/
/* sent out shader constants. */
paramList = vp->mesa_program->Base.Parameters;
if(NULL != paramList) {
_mesa_load_state_parameters(ctx, paramList);
if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
return GL_FALSE;
R600_STATECHANGE(context, vs_consts);
r700->vs.num_consts = paramList->NumParameters;
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
}
} else
r700->vs.num_consts = 0;
return GL_TRUE;
}