| /* |
| * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
| * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /* |
| * Authors: |
| * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> |
| */ |
| |
| #ifndef _R700_ASSEMBLER_H_ |
| #define _R700_ASSEMBLER_H_ |
| |
| #include "main/mtypes.h" |
| #include "shader/prog_instruction.h" |
| |
| #include "r700_chip.h" |
| #include "r700_shaderinst.h" |
| #include "r700_shader.h" |
| |
| typedef enum SHADER_PIPE_TYPE |
| { |
| SPT_VP = 0, |
| SPT_FP = 1 |
| } SHADER_PIPE_TYPE; |
| |
| typedef enum ConstantCycles |
| { |
| NUMBER_OF_CYCLES = 3, |
| NUMBER_OF_COMPONENTS = 4 |
| } ConstantCycles; |
| |
| typedef enum HARDWARE_LIMIT_VALUES |
| { |
| TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE, |
| MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE, |
| MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE, |
| CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE, |
| NUMBER_OF_INPUT_COLORS = 2, |
| NUMBER_OF_OUTPUT_COLORS = 8, |
| NUMBER_OF_TEXTURE_UNITS = 16, |
| MEGA_FETCH_BYTES = 32 |
| } HARDWARE_LIMIT_VALUES; |
| |
| typedef enum AddressMode |
| { |
| ADDR_ABSOLUTE = 0, |
| ADDR_RELATIVE_A0 = 1, |
| ADDR_RELATIVE_FLI_0 = 2, |
| NUMBER_OF_ADDR_MOD = 3 |
| } AddressMode; |
| |
| typedef enum SrcRegisterType |
| { |
| SRC_REG_TEMPORARY = 0, |
| SRC_REG_INPUT = 1, |
| SRC_REG_CONSTANT = 2, |
| SRC_REG_ALT_TEMPORARY = 3, |
| NUMBER_OF_SRC_REG_TYPE = 4 |
| } SrcRegisterType; |
| |
| typedef enum DstRegisterType |
| { |
| DST_REG_TEMPORARY = 0, |
| DST_REG_A0 = 1, |
| DST_REG_OUT = 2, |
| DST_REG_OUT_X_REPL = 3, |
| DST_REG_ALT_TEMPORARY = 4, |
| DST_REG_INPUT = 5, |
| NUMBER_OF_DST_REG_TYPE = 6 |
| } DstRegisterType; |
| |
| typedef unsigned int BITS; |
| |
| typedef struct PVSDSTtag |
| { |
| BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2 |
| BITS math:1; |
| BITS predicated:1; //10 //8 |
| BITS pred_inv :1; //11 //8 |
| |
| BITS rtype:3; |
| BITS reg:10; //24 //20 |
| |
| BITS writex:1; |
| BITS writey:1; |
| BITS writez:1; |
| BITS writew:1; //28 |
| |
| BITS op3:1; // 29 Represents *_OP3_* ALU opcode |
| |
| BITS dualop:1; // 30 //26 |
| |
| BITS addrmode0:1; //31 //29 |
| BITS addrmode1:1; //32 |
| } PVSDST; |
| |
| typedef struct PVSSRCtag |
| { |
| BITS rtype:4; |
| BITS addrmode0:1; |
| BITS reg:10; //15 (8) |
| BITS swizzlex:3; |
| BITS swizzley:3; |
| BITS swizzlez:3; |
| BITS swizzlew:3; //27 |
| |
| BITS negx:1; |
| BITS negy:1; |
| BITS negz:1; |
| BITS negw:1; //31 |
| //BITS addrsel:2; |
| BITS addrmode1:1; //32 |
| } PVSSRC; |
| |
| typedef struct PVSMATHtag |
| { |
| BITS rtype:4; |
| BITS spare:1; |
| BITS reg:8; |
| BITS swizzlex:3; |
| BITS swizzley:3; |
| BITS dstoff:2; // 2 bits of dest offset into alt ram |
| BITS opcode:4; |
| BITS negx:1; |
| BITS negy:1; |
| BITS dstcomp:2; // select dest component |
| BITS spare2:3; |
| } PVSMATH; |
| |
| typedef union PVSDWORDtag |
| { |
| BITS bits; |
| PVSDST dst; |
| PVSSRC src; |
| PVSMATH math; |
| float f; |
| } PVSDWORD; |
| |
| typedef struct VAP_OUT_VTX_FMT_0tag |
| { |
| BITS pos:1; // 0 |
| BITS misc:1; |
| BITS clip_dist0:1; |
| BITS clip_dist1:1; |
| BITS pos_param:1; // 4 |
| |
| BITS color0:1; // 5 |
| BITS color1:1; |
| BITS color2:1; |
| BITS color3:1; |
| BITS color4:1; |
| BITS color5:1; |
| BITS color6:1; |
| BITS color7:1; |
| |
| BITS normal:1; |
| |
| BITS depth:1; // 14 |
| |
| BITS point_size:1; // 15 |
| BITS edge_flag:1; |
| BITS rta_index:1; // shares same channel as kill_flag |
| BITS kill_flag:1; |
| BITS viewport_index:1; // 19 |
| |
| BITS resvd1:12; // 20 |
| } VAP_OUT_VTX_FMT_0; |
| |
| typedef struct VAP_OUT_VTX_FMT_1tag |
| { |
| BITS tex0comp:3; |
| BITS tex1comp:3; |
| BITS tex2comp:3; |
| BITS tex3comp:3; |
| BITS tex4comp:3; |
| BITS tex5comp:3; |
| BITS tex6comp:3; |
| BITS tex7comp:3; |
| |
| BITS resvd:8; |
| } VAP_OUT_VTX_FMT_1; |
| |
| typedef struct VAP_OUT_VTX_FMT_2tag |
| { |
| BITS tex8comp :3; |
| BITS tex9comp :3; |
| BITS tex10comp:3; |
| BITS tex11comp:3; |
| BITS tex12comp:3; |
| BITS tex13comp:3; |
| BITS tex14comp:3; |
| BITS tex15comp:3; |
| |
| BITS resvd:8; |
| } VAP_OUT_VTX_FMT_2; |
| |
| typedef struct OUT_FRAGMENT_FMT_0tag |
| { |
| BITS color0:1; |
| BITS color1:1; |
| BITS color2:1; |
| BITS color3:1; |
| BITS color4:1; |
| BITS color5:1; |
| BITS color6:1; |
| BITS color7:1; |
| |
| BITS depth:1; |
| BITS stencil_ref:1; |
| BITS coverage_to_mask:1; |
| BITS mask:1; |
| |
| BITS resvd1:20; |
| } OUT_FRAGMENT_FMT_0; |
| |
| typedef enum CF_CLAUSE_TYPE |
| { |
| CF_EXPORT_CLAUSE, |
| CF_ALU_CLAUSE, |
| CF_TEX_CLAUSE, |
| CF_VTX_CLAUSE, |
| CF_OTHER_CLAUSE, |
| CF_EMPTY_CLAUSE, |
| NUMBER_CF_CLAUSE_TYPES |
| } CF_CLAUSE_TYPE; |
| |
| enum |
| { |
| MAX_BOOL_CONSTANTS = 32, |
| MAX_INT_CONSTANTS = 32, |
| MAX_FLOAT_CONSTANTS = 256, |
| |
| FC_NONE = 0, |
| FC_IF = 1, |
| FC_LOOP = 2, |
| FC_REP = 3, |
| |
| COND_NONE = 0, |
| COND_BOOL = 1, |
| COND_PRED = 2, |
| COND_ALU = 3, |
| |
| SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup |
| SAFEDIST_ALU = 6 ///< the same for alu->fc |
| }; |
| |
| typedef struct FC_LEVEL |
| { |
| unsigned int first; ///< first fc instruction on level (if, rep, loop) |
| unsigned int* mid; ///< middle instructions - else or all breaks on this level |
| unsigned int midLen; |
| unsigned int type; |
| unsigned int cond; |
| unsigned int inv; |
| unsigned int bpush; ///< 1 if first instruction does branch stack push |
| int id; ///< id of bool or int variable |
| } FC_LEVEL; |
| |
| typedef struct VTX_FETCH_METHOD |
| { |
| GLboolean bEnableMini; |
| GLuint mega_fetch_remainder; |
| } VTX_FETCH_METHOD; |
| |
| typedef struct r700_AssemblerBase |
| { |
| R700ControlFlowSXClause* cf_last_export_ptr; |
| R700ControlFlowSXClause* cf_current_export_clause_ptr; |
| R700ControlFlowALUClause* cf_current_alu_clause_ptr; |
| R700ControlFlowGenericClause* cf_current_tex_clause_ptr; |
| R700ControlFlowGenericClause* cf_current_vtx_clause_ptr; |
| R700ControlFlowGenericClause* cf_current_cf_clause_ptr; |
| |
| //Result shader |
| R700_Shader * pR700Shader; |
| |
| // No clause has been created yet |
| CF_CLAUSE_TYPE cf_current_clause_type; |
| |
| GLuint number_of_exports; |
| GLuint number_of_colorandz_exports; |
| GLuint number_of_export_opcodes; |
| |
| PVSDWORD D; |
| PVSDWORD S[3]; |
| |
| unsigned int uLastPosUpdate; |
| |
| OUT_FRAGMENT_FMT_0 fp_stOutFmt0; |
| |
| unsigned int uIIns; |
| unsigned int uOIns; |
| unsigned int number_used_registers; |
| unsigned int uUsedConsts; |
| |
| // Fragment programs |
| unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX]; |
| unsigned int uiFP_OutputMap[FRAG_RESULT_MAX]; |
| unsigned int uBoolConsts; |
| unsigned int uIntConsts; |
| unsigned int uInsts; |
| unsigned int uConsts; |
| |
| // Vertex programs |
| unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX]; |
| unsigned char ucVP_OutputMap[VERT_RESULT_MAX]; |
| |
| unsigned char * pucOutMask; |
| |
| //----------------------------------------------------------------------------------- |
| // flow control members |
| //----------------------------------------------------------------------------------- |
| unsigned int FCSP; |
| FC_LEVEL fc_stack[32]; |
| |
| unsigned int branch_depth; |
| unsigned int max_branch_depth; |
| |
| //----------------------------------------------------------------------------------- |
| // ArgSubst used in Assemble_Source() function |
| //----------------------------------------------------------------------------------- |
| int aArgSubst[4]; |
| |
| GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ]; |
| GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ]; |
| GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ]; |
| |
| GLuint uOutputs; |
| |
| GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS]; |
| GLint depth_export_register_number; |
| |
| GLint stencil_export_register_number; |
| GLint coverage_to_mask_export_register_number; |
| GLint mask_export_register_number; |
| |
| GLuint starting_export_register_number; |
| GLuint starting_vfetch_register_number; |
| GLuint starting_temp_register_number; |
| GLuint uHelpReg; |
| GLuint uFirstHelpReg; |
| |
| GLboolean input_position_is_used; |
| GLboolean input_normal_is_used; |
| |
| GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS]; |
| |
| GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS]; |
| |
| R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX]; |
| |
| GLuint number_of_inputs; |
| |
| InstDeps *pInstDeps; |
| |
| SHADER_PIPE_TYPE currentShaderType; |
| struct prog_instruction * pILInst; |
| GLuint uiCurInst; |
| GLboolean bR6xx; |
| /* helper to decide which type of instruction to assemble */ |
| GLboolean is_tex; |
| /* we inserted helper intructions and need barrier on next TEX ins */ |
| GLboolean need_tex_barrier; |
| } r700_AssemblerBase; |
| |
| //Internal use |
| BITS addrmode_PVSDST(PVSDST * pPVSDST); |
| void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode); |
| void nomask_PVSDST(PVSDST * pPVSDST); |
| BITS addrmode_PVSSRC(PVSSRC* pPVSSRC); |
| void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode); |
| void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz); |
| void noswizzle_PVSSRC(PVSSRC* pPVSSRC); |
| void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w); |
| void neg_PVSSRC(PVSSRC* pPVSSRC); |
| void noneg_PVSSRC(PVSSRC* pPVSSRC); |
| void flipneg_PVSSRC(PVSSRC* pPVSSRC); |
| void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c); |
| void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c); |
| BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0); |
| BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; |
| GLboolean is_reduction_opcode(PVSDWORD * dest); |
| GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); |
| |
| unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); |
| |
| GLboolean IsTex(gl_inst_opcode Opcode); |
| GLboolean IsAlu(gl_inst_opcode Opcode); |
| int check_current_clause(r700_AssemblerBase* pAsm, |
| CF_CLAUSE_TYPE new_clause_type); |
| GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, |
| R700VertexInstruction* vertex_instruction_ptr); |
| GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, |
| R700TextureInstruction* tex_instruction_ptr); |
| GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, |
| GLuint gl_client_id, |
| GLuint destination_register, |
| GLuint number_of_elements, |
| GLenum dataElementType, |
| VTX_FETCH_METHOD* pFetchMethod); |
| GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, |
| GLuint destination_register, |
| GLenum type, |
| GLint size, |
| GLubyte element, |
| GLuint _signed, |
| GLboolean normalize, |
| VTX_FETCH_METHOD * pFetchMethod); |
| GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); |
| GLuint gethelpr(r700_AssemblerBase* pAsm); |
| void resethelpr(r700_AssemblerBase* pAsm); |
| void checkop_init(r700_AssemblerBase* pAsm); |
| GLboolean mov_temp(r700_AssemblerBase* pAsm, int src); |
| GLboolean checkop1(r700_AssemblerBase* pAsm); |
| GLboolean checkop2(r700_AssemblerBase* pAsm); |
| GLboolean checkop3(r700_AssemblerBase* pAsm); |
| GLboolean assemble_src(r700_AssemblerBase *pAsm, |
| int src, |
| int fld); |
| GLboolean assemble_dst(r700_AssemblerBase *pAsm); |
| GLboolean tex_dst(r700_AssemblerBase *pAsm); |
| GLboolean tex_src(r700_AssemblerBase *pAsm); |
| GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized); |
| void initialize(r700_AssemblerBase *pAsm); |
| GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, |
| int source_index, |
| PVSSRC* pSource, |
| BITS scalar_channel_index); |
| GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, |
| R700ALUInstruction* alu_instruction_ptr, |
| GLuint contiguous_slots_needed); |
| void get_src_properties(R700ALUInstruction* alu_instruction_ptr, |
| int source_index, |
| BITS* psrc_sel, |
| BITS* psrc_rel, |
| BITS* psrc_chan, |
| BITS* psrc_neg); |
| int is_cfile(BITS sel); |
| int is_const(BITS sel); |
| int is_gpr(BITS sel); |
| GLboolean reserve_cfile(r700_AssemblerBase* pAsm, |
| GLuint sel, |
| GLuint chan); |
| GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle); |
| GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle); |
| GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle); |
| GLboolean check_scalar(r700_AssemblerBase* pAsm, |
| R700ALUInstruction* alu_instruction_ptr); |
| GLboolean check_vector(r700_AssemblerBase* pAsm, |
| R700ALUInstruction* alu_instruction_ptr); |
| GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); |
| GLboolean next_ins(r700_AssemblerBase *pAsm); |
| GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); |
| GLboolean assemble_ABS(r700_AssemblerBase *pAsm); |
| GLboolean assemble_ADD(r700_AssemblerBase *pAsm); |
| GLboolean assemble_ARL(r700_AssemblerBase *pAsm); |
| GLboolean assemble_BAD(char *opcode_str); |
| GLboolean assemble_CMP(r700_AssemblerBase *pAsm); |
| GLboolean assemble_COS(r700_AssemblerBase *pAsm); |
| GLboolean assemble_DOT(r700_AssemblerBase *pAsm); |
| GLboolean assemble_DST(r700_AssemblerBase *pAsm); |
| GLboolean assemble_EX2(r700_AssemblerBase *pAsm); |
| GLboolean assemble_EXP(r700_AssemblerBase *pAsm); |
| GLboolean assemble_FLR(r700_AssemblerBase *pAsm); |
| GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); |
| GLboolean assemble_FRC(r700_AssemblerBase *pAsm); |
| GLboolean assemble_KIL(r700_AssemblerBase *pAsm); |
| GLboolean assemble_LG2(r700_AssemblerBase *pAsm); |
| GLboolean assemble_LRP(r700_AssemblerBase *pAsm); |
| GLboolean assemble_LOG(r700_AssemblerBase *pAsm); |
| GLboolean assemble_MAD(r700_AssemblerBase *pAsm); |
| GLboolean assemble_LIT(r700_AssemblerBase *pAsm); |
| GLboolean assemble_MAX(r700_AssemblerBase *pAsm); |
| GLboolean assemble_MIN(r700_AssemblerBase *pAsm); |
| GLboolean assemble_MOV(r700_AssemblerBase *pAsm); |
| GLboolean assemble_MUL(r700_AssemblerBase *pAsm); |
| GLboolean assemble_POW(r700_AssemblerBase *pAsm); |
| GLboolean assemble_RCP(r700_AssemblerBase *pAsm); |
| GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); |
| GLboolean assemble_SIN(r700_AssemblerBase *pAsm); |
| GLboolean assemble_SCS(r700_AssemblerBase *pAsm); |
| GLboolean assemble_SGE(r700_AssemblerBase *pAsm); |
| GLboolean assemble_SLT(r700_AssemblerBase *pAsm); |
| GLboolean assemble_STP(r700_AssemblerBase *pAsm); |
| GLboolean assemble_TEX(r700_AssemblerBase *pAsm); |
| GLboolean assemble_XPD(r700_AssemblerBase *pAsm); |
| GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm); |
| GLboolean assemble_IF(r700_AssemblerBase *pAsm); |
| GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm); |
| |
| GLboolean Process_Export(r700_AssemblerBase* pAsm, |
| GLuint type, |
| GLuint export_starting_index, |
| GLuint export_count, |
| GLuint starting_register_number, |
| GLboolean is_depth_export); |
| GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, |
| BITS depth_channel_select); |
| |
| |
| //Interface |
| GLboolean AssembleInstr(GLuint uiNumberInsts, |
| struct prog_instruction *pILInst, |
| r700_AssemblerBase *pR700AsmCode); |
| GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); |
| GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); |
| |
| int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); |
| GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode); |
| |
| #endif //_R700_ASSEMBLER_H_ |