| /************************************************************************** |
| * |
| * Copyright 2011 The Chromium OS authors. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sub license, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR |
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| **************************************************************************/ |
| |
| #include "i915_reg.h" |
| #include "i915_context.h" |
| #include "i915_fpc.h" |
| |
| #include "pipe/p_shader_tokens.h" |
| #include "util/u_math.h" |
| #include "util/u_memory.h" |
| #include "util/u_string.h" |
| #include "tgsi/tgsi_parse.h" |
| #include "tgsi/tgsi_dump.h" |
| #include "tgsi/tgsi_exec.h" |
| |
| struct i915_optimize_context |
| { |
| int first_write[TGSI_EXEC_NUM_TEMPS]; |
| int last_read[TGSI_EXEC_NUM_TEMPS]; |
| }; |
| |
| static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1) |
| { |
| return (s1->Register.File == d1->Register.File && |
| s1->Register.Indirect == d1->Register.Indirect && |
| s1->Register.Dimension == d1->Register.Dimension && |
| s1->Register.Index == d1->Register.Index); |
| } |
| |
| static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2) |
| { |
| return (d1->Register.File == d2->Register.File && |
| d1->Register.Indirect == d2->Register.Indirect && |
| d1->Register.Dimension == d2->Register.Dimension && |
| d1->Register.Index == d2->Register.Index); |
| } |
| |
| static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2) |
| { |
| return (d1->Register.File == d2->Register.File && |
| d1->Register.Indirect == d2->Register.Indirect && |
| d1->Register.Dimension == d2->Register.Dimension && |
| d1->Register.Index == d2->Register.Index && |
| d1->Register.Absolute == d2->Register.Absolute && |
| d1->Register.Negate == d2->Register.Negate); |
| } |
| |
| static const struct { |
| boolean is_texture; |
| boolean commutes; |
| unsigned neutral_element; |
| unsigned num_dst; |
| unsigned num_src; |
| } op_table [TGSI_OPCODE_LAST] = { |
| [ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 }, |
| [ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 }, |
| [ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 }, |
| [ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, |
| [ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, |
| [ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, |
| [ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 }, |
| [ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 }, |
| [ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 }, |
| [ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 }, |
| [ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 }, |
| [ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, |
| [ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 }, |
| [ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 }, |
| [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 }, |
| [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 }, |
| [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 }, |
| }; |
| |
| static boolean op_has_dst(unsigned opcode) |
| { |
| return (op_table[opcode].num_dst > 0); |
| } |
| |
| static int op_num_dst(unsigned opcode) |
| { |
| return op_table[opcode].num_dst; |
| } |
| |
| static int op_num_src(unsigned opcode) |
| { |
| return op_table[opcode].num_src; |
| } |
| |
| static boolean op_commutes(unsigned opcode) |
| { |
| return op_table[opcode].commutes; |
| } |
| |
| static unsigned mask_for_unswizzled(int num_components) |
| { |
| unsigned mask = 0; |
| switch(num_components) |
| { |
| case 4: |
| mask |= TGSI_WRITEMASK_W; |
| case 3: |
| mask |= TGSI_WRITEMASK_Z; |
| case 2: |
| mask |= TGSI_WRITEMASK_Y; |
| case 1: |
| mask |= TGSI_WRITEMASK_X; |
| } |
| return mask; |
| } |
| |
| static boolean is_unswizzled(struct i915_full_src_register *r, |
| unsigned write_mask) |
| { |
| if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) |
| return FALSE; |
| if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) |
| return FALSE; |
| if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) |
| return FALSE; |
| if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) |
| return FALSE; |
| return TRUE; |
| } |
| |
| static boolean op_is_texture(unsigned opcode) |
| { |
| return op_table[opcode].is_texture; |
| } |
| |
| static unsigned op_neutral_element(unsigned opcode) |
| { |
| unsigned ne = op_table[opcode].neutral_element; |
| if (!ne) { |
| debug_printf("No neutral element for opcode %d\n",opcode); |
| ne = TGSI_SWIZZLE_ZERO; |
| } |
| return ne; |
| } |
| |
| /* |
| * Sets the swizzle to the neutral element for the operation for the bits |
| * of writemask which are set, swizzle to identity otherwise. |
| */ |
| static void set_neutral_element_swizzle(struct i915_full_src_register *r, |
| unsigned write_mask, |
| unsigned neutral) |
| { |
| if ( write_mask & TGSI_WRITEMASK_X ) |
| r->Register.SwizzleX = neutral; |
| else |
| r->Register.SwizzleX = TGSI_SWIZZLE_X; |
| |
| if ( write_mask & TGSI_WRITEMASK_Y ) |
| r->Register.SwizzleY = neutral; |
| else |
| r->Register.SwizzleY = TGSI_SWIZZLE_Y; |
| |
| if ( write_mask & TGSI_WRITEMASK_Z ) |
| r->Register.SwizzleZ = neutral; |
| else |
| r->Register.SwizzleZ = TGSI_SWIZZLE_Z; |
| |
| if ( write_mask & TGSI_WRITEMASK_W ) |
| r->Register.SwizzleW = neutral; |
| else |
| r->Register.SwizzleW = TGSI_SWIZZLE_W; |
| } |
| |
| static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i) |
| { |
| o->File = i->File; |
| o->Indirect = i->Indirect; |
| o->Dimension = i->Dimension; |
| o->Index = i->Index; |
| o->SwizzleX = i->SwizzleX; |
| o->SwizzleY = i->SwizzleY; |
| o->SwizzleZ = i->SwizzleZ; |
| o->SwizzleW = i->SwizzleW; |
| o->Absolute = i->Absolute; |
| o->Negate = i->Negate; |
| } |
| |
| static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i) |
| { |
| o->File = i->File; |
| o->WriteMask = i->WriteMask; |
| o->Indirect = i->Indirect; |
| o->Dimension = i->Dimension; |
| o->Index = i->Index; |
| } |
| |
| static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i) |
| { |
| memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); |
| memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); |
| |
| copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); |
| |
| copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); |
| copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); |
| copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); |
| } |
| |
| static void copy_token(union i915_full_token *o, union tgsi_full_token *i) |
| { |
| if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) |
| memcpy(o, i, sizeof(*o)); |
| else |
| copy_instruction(&o->FullInstruction, &i->FullInstruction); |
| |
| } |
| |
| static void liveness_mark_written(struct i915_optimize_context *ctx, |
| struct i915_full_dst_register *dst_reg, |
| int pos) |
| { |
| int dst_reg_index; |
| if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) { |
| dst_reg_index = dst_reg->Register.Index; |
| assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); |
| /* dead -> live transition */ |
| if (ctx->first_write[dst_reg_index] != -1) |
| ctx->first_write[dst_reg_index] = pos; |
| } |
| } |
| |
| static void liveness_mark_read(struct i915_optimize_context *ctx, |
| struct i915_full_src_register *src_reg, |
| int pos) |
| { |
| int src_reg_index; |
| if (src_reg->Register.File == TGSI_FILE_TEMPORARY) { |
| src_reg_index = src_reg->Register.Index; |
| assert(src_reg_index < TGSI_EXEC_NUM_TEMPS); |
| /* live -> dead transition */ |
| if (ctx->last_read[src_reg_index] != -1) |
| ctx->last_read[src_reg_index] = pos; |
| } |
| } |
| |
| static void liveness_analysis(struct i915_optimize_context *ctx, |
| struct i915_token_list *tokens) |
| { |
| struct i915_full_dst_register *dst_reg; |
| struct i915_full_src_register *src_reg; |
| union i915_full_token *current; |
| unsigned opcode; |
| int num_dst, num_src; |
| int i = 0; |
| |
| for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) |
| { |
| ctx->first_write[i] = -1; |
| ctx->last_read[i] = -1; |
| } |
| |
| for(i = 0; i < tokens->NumTokens; i++) |
| { |
| current = &tokens->Tokens[i]; |
| |
| if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) |
| continue; |
| |
| opcode = current->FullInstruction.Instruction.Opcode; |
| num_dst = op_num_dst(opcode); |
| |
| switch(num_dst) |
| { |
| case 1: |
| dst_reg = ¤t->FullInstruction.Dst[0]; |
| liveness_mark_written(ctx, dst_reg, i); |
| case 0: |
| break; |
| default: |
| debug_printf("Op %d has %d dst regs\n", opcode, num_dst); |
| break; |
| } |
| } |
| |
| for(i = tokens->NumTokens - 1; i >= 0; i--) |
| { |
| current = &tokens->Tokens[i]; |
| |
| if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) |
| continue; |
| |
| opcode = current->FullInstruction.Instruction.Opcode; |
| num_src = op_num_src(opcode); |
| |
| switch(num_src) |
| { |
| case 3: |
| src_reg = ¤t->FullInstruction.Src[2]; |
| liveness_mark_read(ctx, src_reg, i); |
| case 2: |
| src_reg = ¤t->FullInstruction.Src[1]; |
| liveness_mark_read(ctx, src_reg, i); |
| case 1: |
| src_reg = ¤t->FullInstruction.Src[0]; |
| liveness_mark_read(ctx, src_reg, i); |
| case 0: |
| break; |
| default: |
| debug_printf("Op %d has %d src regs\n", opcode, num_src); |
| break; |
| } |
| } |
| } |
| |
| static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from) |
| { |
| int dst_reg_index = dst_reg->Register.Index; |
| assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); |
| return (from >= ctx->last_read[dst_reg_index]); |
| } |
| |
| /* Returns a mask with the components used for a texture access instruction */ |
| static unsigned i915_tex_mask(union i915_full_token *instr) |
| { |
| unsigned mask; |
| |
| /* Get the number of coords */ |
| mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture)); |
| |
| /* Add the W component if projective */ |
| if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP) |
| mask |= TGSI_WRITEMASK_W; |
| |
| return mask; |
| } |
| |
| static boolean target_is_texture2d(uint tex) |
| { |
| switch (tex) { |
| case TGSI_TEXTURE_2D: |
| case TGSI_TEXTURE_RECT: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| |
| /* |
| * Optimize away useless indirect texture reads: |
| * MOV TEMP[0].xy, IN[0].xyyy |
| * TEX TEMP[1], TEMP[0], SAMP[0], 2D |
| * into: |
| * TEX TEMP[1], IN[0], SAMP[0], 2D |
| * |
| * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/.. |
| */ |
| static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx, |
| struct i915_token_list *tokens, |
| int index) |
| { |
| union i915_full_token *current = &tokens->Tokens[index - 1]; |
| union i915_full_token *next = &tokens->Tokens[index]; |
| |
| if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
| op_is_texture(next->FullInstruction.Instruction.Opcode) && |
| target_is_texture2d(next->FullInstruction.Texture.Texture) && |
| same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && |
| is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) && |
| unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) |
| { |
| memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0], sizeof(struct i915_src_register)); |
| current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
| } |
| } |
| |
| /* |
| * Optimize away things like: |
| * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0]) |
| * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0]) |
| * into: |
| * NOP |
| * MOV OUT[0].xyw, TEMP[1].xyww |
| */ |
| static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next) |
| { |
| struct i915_full_src_register *src_reg1, *src_reg2; |
| struct i915_full_dst_register *dst_reg1, *dst_reg2; |
| unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w; |
| |
| if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
| next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
| current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && |
| same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && |
| same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && |
| !same_src_dst_reg(¤t->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) |
| { |
| src_reg1 = ¤t->FullInstruction.Src[0]; |
| dst_reg1 = ¤t->FullInstruction.Dst[0]; |
| src_reg2 = &next->FullInstruction.Src[0]; |
| dst_reg2 = &next->FullInstruction.Dst[0]; |
| |
| /* Start with swizzles from the first mov */ |
| swizzle_x = src_reg1->Register.SwizzleX; |
| swizzle_y = src_reg1->Register.SwizzleY; |
| swizzle_z = src_reg1->Register.SwizzleZ; |
| swizzle_w = src_reg1->Register.SwizzleW; |
| |
| /* Pile the second mov on top */ |
| if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X) |
| swizzle_x = src_reg2->Register.SwizzleX; |
| if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y) |
| swizzle_y = src_reg2->Register.SwizzleY; |
| if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z) |
| swizzle_z = src_reg2->Register.SwizzleZ; |
| if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W) |
| swizzle_w = src_reg2->Register.SwizzleW; |
| |
| dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask; |
| src_reg2->Register.SwizzleX = swizzle_x; |
| src_reg2->Register.SwizzleY = swizzle_y; |
| src_reg2->Register.SwizzleZ = swizzle_z; |
| src_reg2->Register.SwizzleW = swizzle_w; |
| |
| current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
| |
| return; |
| } |
| } |
| |
| /* |
| * Optimize away things like: |
| * MUL OUT[0].xyz, TEMP[1], TEMP[2] |
| * MOV OUT[0].w, TEMP[2] |
| * into: |
| * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] |
| * This is useful for optimizing texenv. |
| */ |
| static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next) |
| { |
| if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| op_commutes(current->FullInstruction.Instruction.Opcode) && |
| current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && |
| next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
| same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && |
| same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && |
| !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && |
| is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && |
| is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && |
| is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) |
| { |
| next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
| |
| set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); |
| set_neutral_element_swizzle(¤t->FullInstruction.Src[0], |
| next->FullInstruction.Dst[0].Register.WriteMask, |
| op_neutral_element(current->FullInstruction.Instruction.Opcode)); |
| |
| current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | |
| next->FullInstruction.Dst[0].Register.WriteMask; |
| return; |
| } |
| |
| if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| op_commutes(current->FullInstruction.Instruction.Opcode) && |
| current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && |
| next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
| same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && |
| same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && |
| !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && |
| is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && |
| is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && |
| is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) |
| { |
| next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
| |
| set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); |
| set_neutral_element_swizzle(¤t->FullInstruction.Src[1], |
| next->FullInstruction.Dst[0].Register.WriteMask, |
| op_neutral_element(current->FullInstruction.Instruction.Opcode)); |
| |
| current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | |
| next->FullInstruction.Dst[0].Register.WriteMask; |
| return; |
| } |
| } |
| |
| /* |
| * Optimize away things like: |
| * MOV TEMP[0].xyz TEMP[0].xyzx |
| * into: |
| * NOP |
| */ |
| static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current) |
| { |
| union i915_full_token current; |
| copy_token(¤t , tgsi_current); |
| if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
| op_has_dst(current.FullInstruction.Instruction.Opcode) && |
| !current.FullInstruction.Instruction.Saturate && |
| current.FullInstruction.Src[0].Register.Absolute == 0 && |
| current.FullInstruction.Src[0].Register.Negate == 0 && |
| is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) && |
| same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) ) |
| { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| /* |
| * Optimize away things like: |
| * *** TEMP[0], TEMP[1], TEMP[2] |
| * MOV OUT[0] TEMP[0] |
| * into: |
| * *** OUT[0], TEMP[1], TEMP[2] |
| */ |
| static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx, |
| struct i915_token_list *tokens, |
| int index) |
| { |
| union i915_full_token *current = &tokens->Tokens[index - 1]; |
| union i915_full_token *next = &tokens->Tokens[index]; |
| |
| // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); |
| if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
| next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
| op_has_dst(current->FullInstruction.Instruction.Opcode) && |
| !next->FullInstruction.Instruction.Saturate && |
| next->FullInstruction.Src[0].Register.Absolute == 0 && |
| next->FullInstruction.Src[0].Register.Negate == 0 && |
| unused_from(ctx, ¤t->FullInstruction.Dst[0], index) && |
| current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW && |
| is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && |
| current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && |
| same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) |
| { |
| next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
| |
| current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0]; |
| return; |
| } |
| } |
| |
| struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) |
| { |
| struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); |
| struct tgsi_parse_context parse; |
| struct i915_optimize_context *ctx; |
| int i = 0; |
| |
| ctx = malloc(sizeof(*ctx)); |
| |
| out_tokens->NumTokens = 0; |
| |
| /* Count the tokens */ |
| tgsi_parse_init( &parse, tokens ); |
| while( !tgsi_parse_end_of_tokens( &parse ) ) { |
| tgsi_parse_token( &parse ); |
| out_tokens->NumTokens++; |
| } |
| tgsi_parse_free (&parse); |
| |
| /* Allocate our tokens */ |
| out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); |
| |
| tgsi_parse_init( &parse, tokens ); |
| while( !tgsi_parse_end_of_tokens( &parse ) ) { |
| tgsi_parse_token( &parse ); |
| |
| if (i915_fpc_useless_mov(&parse.FullToken)) { |
| out_tokens->NumTokens--; |
| continue; |
| } |
| |
| copy_token(&out_tokens->Tokens[i] , &parse.FullToken); |
| |
| i++; |
| } |
| tgsi_parse_free (&parse); |
| |
| liveness_analysis(ctx, out_tokens); |
| |
| i = 1; |
| while( i < out_tokens->NumTokens) { |
| i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i); |
| i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); |
| i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); |
| i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i); |
| i++; |
| } |
| |
| free(ctx); |
| |
| return out_tokens; |
| } |
| |
| void i915_optimize_free(struct i915_token_list *tokens) |
| { |
| free(tokens->Tokens); |
| free(tokens); |
| } |
| |
| |