blob: c66db502a1704f98aa00127c85a29f1154a0737b [file] [log] [blame]
/*
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Authors:
* Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
*/
#ifndef _R700_ASSEMBLER_H_
#define _R700_ASSEMBLER_H_
#include "main/mtypes.h"
#include "shader/prog_instruction.h"
#include "r700_chip.h"
#include "r700_shaderinst.h"
#include "r700_shader.h"
typedef enum SHADER_PIPE_TYPE
{
SPT_VP = 0,
SPT_FP = 1
} SHADER_PIPE_TYPE;
typedef enum ConstantCycles
{
NUMBER_OF_CYCLES = 3,
NUMBER_OF_COMPONENTS = 4
} ConstantCycles;
typedef enum HARDWARE_LIMIT_VALUES
{
TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE,
MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE,
CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE,
NUMBER_OF_INPUT_COLORS = 2,
NUMBER_OF_OUTPUT_COLORS = 8,
NUMBER_OF_TEXTURE_UNITS = 16,
MEGA_FETCH_BYTES = 32
} HARDWARE_LIMIT_VALUES;
typedef enum AddressMode
{
ADDR_ABSOLUTE = 0,
ADDR_RELATIVE_A0 = 1,
ADDR_RELATIVE_FLI_0 = 2,
NUMBER_OF_ADDR_MOD = 3
} AddressMode;
typedef enum SrcRegisterType
{
SRC_REG_TEMPORARY = 0,
SRC_REG_INPUT = 1,
SRC_REG_CONSTANT = 2,
SRC_REG_ALT_TEMPORARY = 3,
NUMBER_OF_SRC_REG_TYPE = 4
} SrcRegisterType;
typedef enum DstRegisterType
{
DST_REG_TEMPORARY = 0,
DST_REG_A0 = 1,
DST_REG_OUT = 2,
DST_REG_OUT_X_REPL = 3,
DST_REG_ALT_TEMPORARY = 4,
DST_REG_INPUT = 5,
NUMBER_OF_DST_REG_TYPE = 6
} DstRegisterType;
typedef unsigned int BITS;
typedef struct PVSDSTtag
{
BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2
BITS math:1;
BITS predicated:1; //10 //8
BITS pred_inv :1; //11 //8
BITS rtype:3;
BITS reg:10; //24 //20
BITS writex:1;
BITS writey:1;
BITS writez:1;
BITS writew:1; //28
BITS op3:1; // 29 Represents *_OP3_* ALU opcode
BITS dualop:1; // 30 //26
BITS addrmode0:1; //31 //29
BITS addrmode1:1; //32
} PVSDST;
typedef struct PVSSRCtag
{
BITS rtype:4;
BITS addrmode0:1;
BITS reg:10; //15 (8)
BITS swizzlex:3;
BITS swizzley:3;
BITS swizzlez:3;
BITS swizzlew:3; //27
BITS negx:1;
BITS negy:1;
BITS negz:1;
BITS negw:1; //31
//BITS addrsel:2;
BITS addrmode1:1; //32
} PVSSRC;
typedef struct PVSMATHtag
{
BITS rtype:4;
BITS spare:1;
BITS reg:8;
BITS swizzlex:3;
BITS swizzley:3;
BITS dstoff:2; // 2 bits of dest offset into alt ram
BITS opcode:4;
BITS negx:1;
BITS negy:1;
BITS dstcomp:2; // select dest component
BITS spare2:3;
} PVSMATH;
typedef union PVSDWORDtag
{
BITS bits;
PVSDST dst;
PVSSRC src;
PVSMATH math;
float f;
} PVSDWORD;
typedef struct VAP_OUT_VTX_FMT_0tag
{
BITS pos:1; // 0
BITS misc:1;
BITS clip_dist0:1;
BITS clip_dist1:1;
BITS pos_param:1; // 4
BITS color0:1; // 5
BITS color1:1;
BITS color2:1;
BITS color3:1;
BITS color4:1;
BITS color5:1;
BITS color6:1;
BITS color7:1;
BITS normal:1;
BITS depth:1; // 14
BITS point_size:1; // 15
BITS edge_flag:1;
BITS rta_index:1; // shares same channel as kill_flag
BITS kill_flag:1;
BITS viewport_index:1; // 19
BITS resvd1:12; // 20
} VAP_OUT_VTX_FMT_0;
typedef struct VAP_OUT_VTX_FMT_1tag
{
BITS tex0comp:3;
BITS tex1comp:3;
BITS tex2comp:3;
BITS tex3comp:3;
BITS tex4comp:3;
BITS tex5comp:3;
BITS tex6comp:3;
BITS tex7comp:3;
BITS resvd:8;
} VAP_OUT_VTX_FMT_1;
typedef struct VAP_OUT_VTX_FMT_2tag
{
BITS tex8comp :3;
BITS tex9comp :3;
BITS tex10comp:3;
BITS tex11comp:3;
BITS tex12comp:3;
BITS tex13comp:3;
BITS tex14comp:3;
BITS tex15comp:3;
BITS resvd:8;
} VAP_OUT_VTX_FMT_2;
typedef struct OUT_FRAGMENT_FMT_0tag
{
BITS color0:1;
BITS color1:1;
BITS color2:1;
BITS color3:1;
BITS color4:1;
BITS color5:1;
BITS color6:1;
BITS color7:1;
BITS depth:1;
BITS stencil_ref:1;
BITS coverage_to_mask:1;
BITS mask:1;
BITS resvd1:20;
} OUT_FRAGMENT_FMT_0;
typedef enum CF_CLAUSE_TYPE
{
CF_EXPORT_CLAUSE,
CF_ALU_CLAUSE,
CF_TEX_CLAUSE,
CF_VTX_CLAUSE,
CF_OTHER_CLAUSE,
CF_EMPTY_CLAUSE,
NUMBER_CF_CLAUSE_TYPES
} CF_CLAUSE_TYPE;
enum
{
MAX_BOOL_CONSTANTS = 32,
MAX_INT_CONSTANTS = 32,
MAX_FLOAT_CONSTANTS = 256,
FC_NONE = 0,
FC_IF = 1,
FC_LOOP = 2,
FC_REP = 3,
COND_NONE = 0,
COND_BOOL = 1,
COND_PRED = 2,
COND_ALU = 3,
SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
SAFEDIST_ALU = 6 ///< the same for alu->fc
};
typedef struct FC_LEVEL
{
unsigned int first; ///< first fc instruction on level (if, rep, loop)
unsigned int* mid; ///< middle instructions - else or all breaks on this level
unsigned int midLen;
unsigned int type;
unsigned int cond;
unsigned int inv;
unsigned int bpush; ///< 1 if first instruction does branch stack push
int id; ///< id of bool or int variable
} FC_LEVEL;
typedef struct VTX_FETCH_METHOD
{
GLboolean bEnableMini;
GLuint mega_fetch_remainder;
} VTX_FETCH_METHOD;
typedef struct r700_AssemblerBase
{
R700ControlFlowSXClause* cf_last_export_ptr;
R700ControlFlowSXClause* cf_current_export_clause_ptr;
R700ControlFlowALUClause* cf_current_alu_clause_ptr;
R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
//Result shader
R700_Shader * pR700Shader;
// No clause has been created yet
CF_CLAUSE_TYPE cf_current_clause_type;
GLuint number_of_exports;
GLuint number_of_colorandz_exports;
GLuint number_of_export_opcodes;
PVSDWORD D;
PVSDWORD S[3];
unsigned int uLastPosUpdate;
OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
unsigned int uIIns;
unsigned int uOIns;
unsigned int number_used_registers;
unsigned int uUsedConsts;
// Fragment programs
unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
unsigned int uBoolConsts;
unsigned int uIntConsts;
unsigned int uInsts;
unsigned int uConsts;
// Vertex programs
unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
unsigned char * pucOutMask;
//-----------------------------------------------------------------------------------
// flow control members
//-----------------------------------------------------------------------------------
unsigned int FCSP;
FC_LEVEL fc_stack[32];
unsigned int branch_depth;
unsigned int max_branch_depth;
//-----------------------------------------------------------------------------------
// ArgSubst used in Assemble_Source() function
//-----------------------------------------------------------------------------------
int aArgSubst[4];
GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
GLuint uOutputs;
GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
GLint depth_export_register_number;
GLint stencil_export_register_number;
GLint coverage_to_mask_export_register_number;
GLint mask_export_register_number;
GLuint starting_export_register_number;
GLuint starting_vfetch_register_number;
GLuint starting_temp_register_number;
GLuint uHelpReg;
GLuint uFirstHelpReg;
GLboolean input_position_is_used;
GLboolean input_normal_is_used;
GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
GLuint number_of_inputs;
InstDeps *pInstDeps;
SHADER_PIPE_TYPE currentShaderType;
struct prog_instruction * pILInst;
GLuint uiCurInst;
GLboolean bR6xx;
/* helper to decide which type of instruction to assemble */
GLboolean is_tex;
/* we inserted helper intructions and need barrier on next TEX ins */
GLboolean need_tex_barrier;
} r700_AssemblerBase;
//Internal use
BITS addrmode_PVSDST(PVSDST * pPVSDST);
void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
void nomask_PVSDST(PVSDST * pPVSDST);
BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
void neg_PVSSRC(PVSSRC* pPVSSRC);
void noneg_PVSSRC(PVSSRC* pPVSSRC);
void flipneg_PVSSRC(PVSSRC* pPVSSRC);
void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
GLboolean is_reduction_opcode(PVSDWORD * dest);
GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
GLboolean IsTex(gl_inst_opcode Opcode);
GLboolean IsAlu(gl_inst_opcode Opcode);
int check_current_clause(r700_AssemblerBase* pAsm,
CF_CLAUSE_TYPE new_clause_type);
GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
R700VertexInstruction* vertex_instruction_ptr);
GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
R700TextureInstruction* tex_instruction_ptr);
GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
GLuint gl_client_id,
GLuint destination_register,
GLuint number_of_elements,
GLenum dataElementType,
VTX_FETCH_METHOD* pFetchMethod);
GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
GLuint destination_register,
GLenum type,
GLint size,
GLubyte element,
GLuint _signed,
GLboolean normalize,
VTX_FETCH_METHOD * pFetchMethod);
GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
GLuint gethelpr(r700_AssemblerBase* pAsm);
void resethelpr(r700_AssemblerBase* pAsm);
void checkop_init(r700_AssemblerBase* pAsm);
GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
GLboolean checkop1(r700_AssemblerBase* pAsm);
GLboolean checkop2(r700_AssemblerBase* pAsm);
GLboolean checkop3(r700_AssemblerBase* pAsm);
GLboolean assemble_src(r700_AssemblerBase *pAsm,
int src,
int fld);
GLboolean assemble_dst(r700_AssemblerBase *pAsm);
GLboolean tex_dst(r700_AssemblerBase *pAsm);
GLboolean tex_src(r700_AssemblerBase *pAsm);
GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized);
void initialize(r700_AssemblerBase *pAsm);
GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
int source_index,
PVSSRC* pSource,
BITS scalar_channel_index);
GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr,
GLuint contiguous_slots_needed);
void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
int source_index,
BITS* psrc_sel,
BITS* psrc_rel,
BITS* psrc_chan,
BITS* psrc_neg);
int is_cfile(BITS sel);
int is_const(BITS sel);
int is_gpr(BITS sel);
GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
GLuint sel,
GLuint chan);
GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
GLboolean check_scalar(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr);
GLboolean check_vector(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr);
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
GLboolean next_ins(r700_AssemblerBase *pAsm);
GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
GLboolean assemble_BAD(char *opcode_str);
GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
GLboolean assemble_COS(r700_AssemblerBase *pAsm);
GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
GLboolean assemble_DST(r700_AssemblerBase *pAsm);
GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
GLboolean assemble_POW(r700_AssemblerBase *pAsm);
GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
GLboolean assemble_STP(r700_AssemblerBase *pAsm);
GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
GLboolean assemble_IF(r700_AssemblerBase *pAsm);
GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
GLuint export_count,
GLuint starting_register_number,
GLboolean is_depth_export);
GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
BITS depth_channel_select);
//Interface
GLboolean AssembleInstr(GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode);
GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
#endif //_R700_ASSEMBLER_H_