| /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
| |
| /* |
| * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| * |
| * Authors: |
| * Rob Clark <robclark@freedesktop.org> |
| */ |
| |
| #include "pipe/p_state.h" |
| #include "util/u_string.h" |
| #include "util/u_memory.h" |
| #include "util/u_inlines.h" |
| #include "tgsi/tgsi_parse.h" |
| #include "tgsi/tgsi_ureg.h" |
| #include "tgsi/tgsi_info.h" |
| #include "tgsi/tgsi_strings.h" |
| #include "tgsi/tgsi_dump.h" |
| |
| #include "fd2_compiler.h" |
| #include "fd2_program.h" |
| #include "fd2_util.h" |
| |
| #include "instr-a2xx.h" |
| #include "ir-a2xx.h" |
| |
| struct fd2_compile_context { |
| struct fd_program_stateobj *prog; |
| struct fd2_shader_stateobj *so; |
| |
| struct tgsi_parse_context parser; |
| unsigned type; |
| |
| /* predicate stack: */ |
| int pred_depth; |
| enum ir2_pred pred_stack[8]; |
| |
| /* Internal-Temporary and Predicate register assignment: |
| * |
| * Some TGSI instructions which translate into multiple actual |
| * instructions need one or more temporary registers, which are not |
| * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). |
| * And some instructions (texture fetch) cannot write directly to |
| * output registers. We could be more clever and re-use dst or a |
| * src register in some cases. But for now don't try to be clever. |
| * Eventually we should implement an optimization pass that re- |
| * juggles the register usage and gets rid of unneeded temporaries. |
| * |
| * The predicate register must be valid across multiple TGSI |
| * instructions, but internal temporary's do not. For this reason, |
| * once the predicate register is requested, until it is no longer |
| * needed, it gets the first register slot after after the TGSI |
| * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the |
| * internal temporaries get the register slots above this. |
| */ |
| |
| int pred_reg; |
| int num_internal_temps; |
| |
| uint8_t num_regs[TGSI_FILE_COUNT]; |
| |
| /* maps input register idx to prog->export_linkage idx: */ |
| uint8_t input_export_idx[64]; |
| |
| /* maps output register idx to prog->export_linkage idx: */ |
| uint8_t output_export_idx[64]; |
| |
| /* idx/slot for last compiler generated immediate */ |
| unsigned immediate_idx; |
| |
| // TODO we can skip emit exports in the VS that the FS doesn't need.. |
| // and get rid perhaps of num_param.. |
| unsigned num_position, num_param; |
| unsigned position, psize; |
| |
| uint64_t need_sync; |
| |
| /* current exec CF instruction */ |
| struct ir2_cf *cf; |
| }; |
| |
| static int |
| semantic_idx(struct tgsi_declaration_semantic *semantic) |
| { |
| int idx = semantic->Name; |
| if (idx == TGSI_SEMANTIC_GENERIC) |
| idx = TGSI_SEMANTIC_COUNT + semantic->Index; |
| return idx; |
| } |
| |
| /* assign/get the input/export register # for given semantic idx as |
| * returned by semantic_idx(): |
| */ |
| static int |
| export_linkage(struct fd2_compile_context *ctx, int idx) |
| { |
| struct fd_program_stateobj *prog = ctx->prog; |
| |
| /* if first time we've seen this export, assign the next available slot: */ |
| if (prog->export_linkage[idx] == 0xff) |
| prog->export_linkage[idx] = prog->num_exports++; |
| |
| return prog->export_linkage[idx]; |
| } |
| |
| static unsigned |
| compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog, |
| struct fd2_shader_stateobj *so) |
| { |
| unsigned ret; |
| |
| ctx->prog = prog; |
| ctx->so = so; |
| ctx->cf = NULL; |
| ctx->pred_depth = 0; |
| |
| ret = tgsi_parse_init(&ctx->parser, so->tokens); |
| if (ret != TGSI_PARSE_OK) |
| return ret; |
| |
| ctx->type = ctx->parser.FullHeader.Processor.Processor; |
| ctx->position = ~0; |
| ctx->psize = ~0; |
| ctx->num_position = 0; |
| ctx->num_param = 0; |
| ctx->need_sync = 0; |
| ctx->immediate_idx = 0; |
| ctx->pred_reg = -1; |
| ctx->num_internal_temps = 0; |
| |
| memset(ctx->num_regs, 0, sizeof(ctx->num_regs)); |
| memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx)); |
| memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx)); |
| |
| /* do first pass to extract declarations: */ |
| while (!tgsi_parse_end_of_tokens(&ctx->parser)) { |
| tgsi_parse_token(&ctx->parser); |
| |
| switch (ctx->parser.FullToken.Token.Type) { |
| case TGSI_TOKEN_TYPE_DECLARATION: { |
| struct tgsi_full_declaration *decl = |
| &ctx->parser.FullToken.FullDeclaration; |
| if (decl->Declaration.File == TGSI_FILE_OUTPUT) { |
| unsigned name = decl->Semantic.Name; |
| |
| assert(decl->Declaration.Semantic); // TODO is this ever not true? |
| |
| ctx->output_export_idx[decl->Range.First] = |
| semantic_idx(&decl->Semantic); |
| |
| if (ctx->type == PIPE_SHADER_VERTEX) { |
| switch (name) { |
| case TGSI_SEMANTIC_POSITION: |
| ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT]; |
| ctx->num_position++; |
| break; |
| case TGSI_SEMANTIC_PSIZE: |
| ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT]; |
| ctx->num_position++; |
| break; |
| case TGSI_SEMANTIC_COLOR: |
| case TGSI_SEMANTIC_GENERIC: |
| ctx->num_param++; |
| break; |
| default: |
| DBG("unknown VS semantic name: %s", |
| tgsi_semantic_names[name]); |
| assert(0); |
| } |
| } else { |
| switch (name) { |
| case TGSI_SEMANTIC_COLOR: |
| case TGSI_SEMANTIC_GENERIC: |
| ctx->num_param++; |
| break; |
| default: |
| DBG("unknown PS semantic name: %s", |
| tgsi_semantic_names[name]); |
| assert(0); |
| } |
| } |
| } else if (decl->Declaration.File == TGSI_FILE_INPUT) { |
| ctx->input_export_idx[decl->Range.First] = |
| semantic_idx(&decl->Semantic); |
| } |
| ctx->num_regs[decl->Declaration.File] = |
| MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1); |
| break; |
| } |
| case TGSI_TOKEN_TYPE_IMMEDIATE: { |
| struct tgsi_full_immediate *imm = |
| &ctx->parser.FullToken.FullImmediate; |
| unsigned n = ctx->so->num_immediates++; |
| memcpy(ctx->so->immediates[n].val, imm->u, 16); |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| |
| /* TGSI generated immediates are always entire vec4's, ones we |
| * generate internally are not: |
| */ |
| ctx->immediate_idx = ctx->so->num_immediates * 4; |
| |
| ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT]; |
| |
| tgsi_parse_free(&ctx->parser); |
| |
| return tgsi_parse_init(&ctx->parser, so->tokens); |
| } |
| |
| static void |
| compile_free(struct fd2_compile_context *ctx) |
| { |
| tgsi_parse_free(&ctx->parser); |
| } |
| |
| static struct ir2_cf * |
| next_exec_cf(struct fd2_compile_context *ctx) |
| { |
| struct ir2_cf *cf = ctx->cf; |
| if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs)) |
| ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC); |
| return cf; |
| } |
| |
| static void |
| compile_vtx_fetch(struct fd2_compile_context *ctx) |
| { |
| struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs; |
| int i; |
| for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { |
| struct ir2_instruction *instr = ir2_instr_create( |
| next_exec_cf(ctx), IR2_FETCH); |
| instr->fetch.opc = VTX_FETCH; |
| |
| ctx->need_sync |= 1 << (i+1); |
| |
| ir2_reg_create(instr, i+1, "xyzw", 0); |
| ir2_reg_create(instr, 0, "x", 0); |
| |
| if (i == 0) |
| instr->sync = true; |
| |
| vfetch_instrs[i] = instr; |
| } |
| ctx->so->num_vfetch_instrs = i; |
| ctx->cf = NULL; |
| } |
| |
| /* |
| * For vertex shaders (VS): |
| * --- ------ ------------- |
| * |
| * Inputs: R1-R(num_input) |
| * Constants: C0-C(num_const-1) |
| * Immediates: C(num_const)-C(num_const+num_imm-1) |
| * Outputs: export0-export(n) and export62, export63 |
| * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63) |
| * Temps: R(num_input+1)-R(num_input+num_temps) |
| * |
| * R0 could be clobbered after the vertex fetch instructions.. so we |
| * could use it for one of the temporaries. |
| * |
| * TODO: maybe the vertex fetch part could fetch first input into R0 as |
| * the last vtx fetch instruction, which would let us use the same |
| * register layout in either case.. although this is not what the blob |
| * compiler does. |
| * |
| * |
| * For frag shaders (PS): |
| * --- ---- ------------- |
| * |
| * Inputs: R0-R(num_input-1) |
| * Constants: same as VS |
| * Immediates: same as VS |
| * Outputs: export0-export(num_outputs) |
| * Temps: R(num_input)-R(num_input+num_temps-1) |
| * |
| * In either case, immediates are are postpended to the constants |
| * (uniforms). |
| * |
| */ |
| |
| static unsigned |
| get_temp_gpr(struct fd2_compile_context *ctx, int idx) |
| { |
| unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT]; |
| if (ctx->type == PIPE_SHADER_VERTEX) |
| num++; |
| return num; |
| } |
| |
| static struct ir2_register * |
| add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, |
| const struct tgsi_dst_register *dst) |
| { |
| unsigned flags = 0, num = 0; |
| char swiz[5]; |
| |
| switch (dst->File) { |
| case TGSI_FILE_OUTPUT: |
| flags |= IR2_REG_EXPORT; |
| if (ctx->type == PIPE_SHADER_VERTEX) { |
| if (dst->Index == ctx->position) { |
| num = 62; |
| } else if (dst->Index == ctx->psize) { |
| num = 63; |
| } else { |
| num = export_linkage(ctx, |
| ctx->output_export_idx[dst->Index]); |
| } |
| } else { |
| num = dst->Index; |
| } |
| break; |
| case TGSI_FILE_TEMPORARY: |
| num = get_temp_gpr(ctx, dst->Index); |
| break; |
| default: |
| DBG("unsupported dst register file: %s", |
| tgsi_file_name(dst->File)); |
| assert(0); |
| break; |
| } |
| |
| swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_'; |
| swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_'; |
| swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_'; |
| swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; |
| swiz[4] = '\0'; |
| |
| return ir2_reg_create(alu, num, swiz, flags); |
| } |
| |
| static struct ir2_register * |
| add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, |
| const struct tgsi_src_register *src) |
| { |
| static const char swiz_vals[] = { |
| 'x', 'y', 'z', 'w', |
| }; |
| char swiz[5]; |
| unsigned flags = 0, num = 0; |
| |
| switch (src->File) { |
| case TGSI_FILE_CONSTANT: |
| num = src->Index; |
| flags |= IR2_REG_CONST; |
| break; |
| case TGSI_FILE_INPUT: |
| if (ctx->type == PIPE_SHADER_VERTEX) { |
| num = src->Index + 1; |
| } else { |
| num = export_linkage(ctx, |
| ctx->input_export_idx[src->Index]); |
| } |
| break; |
| case TGSI_FILE_TEMPORARY: |
| num = get_temp_gpr(ctx, src->Index); |
| break; |
| case TGSI_FILE_IMMEDIATE: |
| num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT]; |
| flags |= IR2_REG_CONST; |
| break; |
| default: |
| DBG("unsupported src register file: %s", |
| tgsi_file_name(src->File)); |
| assert(0); |
| break; |
| } |
| |
| if (src->Absolute) |
| flags |= IR2_REG_ABS; |
| if (src->Negate) |
| flags |= IR2_REG_NEGATE; |
| |
| swiz[0] = swiz_vals[src->SwizzleX]; |
| swiz[1] = swiz_vals[src->SwizzleY]; |
| swiz[2] = swiz_vals[src->SwizzleZ]; |
| swiz[3] = swiz_vals[src->SwizzleW]; |
| swiz[4] = '\0'; |
| |
| if ((ctx->need_sync & ((uint64_t)1 << num)) && |
| !(flags & IR2_REG_CONST)) { |
| alu->sync = true; |
| ctx->need_sync &= ~((uint64_t)1 << num); |
| } |
| |
| return ir2_reg_create(alu, num, swiz, flags); |
| } |
| |
| static void |
| add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
| { |
| if (inst->Instruction.Saturate) { |
| alu->alu.vector_clamp = true; |
| } |
| } |
| |
| static void |
| add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
| { |
| if (inst->Instruction.Saturate) { |
| alu->alu.scalar_clamp = true; |
| } |
| } |
| |
| static void |
| add_regs_vector_1(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
| { |
| assert(inst->Instruction.NumSrcRegs == 1); |
| assert(inst->Instruction.NumDstRegs == 1); |
| |
| add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
| add_src_reg(ctx, alu, &inst->Src[0].Register); |
| add_src_reg(ctx, alu, &inst->Src[0].Register); |
| add_vector_clamp(inst, alu); |
| } |
| |
| static void |
| add_regs_vector_2(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
| { |
| assert(inst->Instruction.NumSrcRegs == 2); |
| assert(inst->Instruction.NumDstRegs == 1); |
| |
| add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
| add_src_reg(ctx, alu, &inst->Src[0].Register); |
| add_src_reg(ctx, alu, &inst->Src[1].Register); |
| add_vector_clamp(inst, alu); |
| } |
| |
| static void |
| add_regs_vector_3(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
| { |
| assert(inst->Instruction.NumSrcRegs == 3); |
| assert(inst->Instruction.NumDstRegs == 1); |
| |
| add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
| /* maybe should re-arrange the syntax some day, but |
| * in assembler/disassembler and what ir.c expects |
| * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 |
| */ |
| add_src_reg(ctx, alu, &inst->Src[2].Register); |
| add_src_reg(ctx, alu, &inst->Src[0].Register); |
| add_src_reg(ctx, alu, &inst->Src[1].Register); |
| add_vector_clamp(inst, alu); |
| } |
| |
| static void |
| add_regs_dummy_vector(struct ir2_instruction *alu) |
| { |
| /* create dummy, non-written vector dst/src regs |
| * for unused vector instr slot: |
| */ |
| ir2_reg_create(alu, 0, "____", 0); /* vector dst */ |
| ir2_reg_create(alu, 0, NULL, 0); /* vector src1 */ |
| ir2_reg_create(alu, 0, NULL, 0); /* vector src2 */ |
| } |
| |
| static void |
| add_regs_scalar_1(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
| { |
| assert(inst->Instruction.NumSrcRegs == 1); |
| assert(inst->Instruction.NumDstRegs == 1); |
| |
| add_regs_dummy_vector(alu); |
| |
| add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
| add_src_reg(ctx, alu, &inst->Src[0].Register); |
| add_scalar_clamp(inst, alu); |
| } |
| |
| /* |
| * Helpers for TGSI instructions that don't map to a single shader instr: |
| */ |
| |
| static void |
| src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) |
| { |
| src->File = dst->File; |
| src->Indirect = dst->Indirect; |
| src->Dimension = dst->Dimension; |
| src->Index = dst->Index; |
| src->Absolute = 0; |
| src->Negate = 0; |
| src->SwizzleX = TGSI_SWIZZLE_X; |
| src->SwizzleY = TGSI_SWIZZLE_Y; |
| src->SwizzleZ = TGSI_SWIZZLE_Z; |
| src->SwizzleW = TGSI_SWIZZLE_W; |
| } |
| |
| /* Get internal-temp src/dst to use for a sequence of instructions |
| * generated by a single TGSI op. |
| */ |
| static void |
| get_internal_temp(struct fd2_compile_context *ctx, |
| struct tgsi_dst_register *tmp_dst, |
| struct tgsi_src_register *tmp_src) |
| { |
| int n; |
| |
| tmp_dst->File = TGSI_FILE_TEMPORARY; |
| tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; |
| tmp_dst->Indirect = 0; |
| tmp_dst->Dimension = 0; |
| |
| /* assign next temporary: */ |
| n = ctx->num_internal_temps++; |
| if (ctx->pred_reg != -1) |
| n++; |
| |
| tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n; |
| |
| src_from_dst(tmp_src, tmp_dst); |
| } |
| |
| static void |
| get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst, |
| struct tgsi_src_register *src) |
| { |
| assert(ctx->pred_reg != -1); |
| |
| dst->File = TGSI_FILE_TEMPORARY; |
| dst->WriteMask = TGSI_WRITEMASK_W; |
| dst->Indirect = 0; |
| dst->Dimension = 0; |
| dst->Index = get_temp_gpr(ctx, ctx->pred_reg); |
| |
| if (src) { |
| src_from_dst(src, dst); |
| src->SwizzleX = TGSI_SWIZZLE_W; |
| src->SwizzleY = TGSI_SWIZZLE_W; |
| src->SwizzleZ = TGSI_SWIZZLE_W; |
| src->SwizzleW = TGSI_SWIZZLE_W; |
| } |
| } |
| |
| static void |
| push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) |
| { |
| struct ir2_instruction *alu; |
| struct tgsi_dst_register pred_dst; |
| |
| /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by |
| * themselves: |
| */ |
| ctx->cf = NULL; |
| |
| if (ctx->pred_depth == 0) { |
| /* assign predicate register: */ |
| ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY]; |
| |
| get_predicate(ctx, &pred_dst, NULL); |
| |
| alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs); |
| add_regs_dummy_vector(alu); |
| add_dst_reg(ctx, alu, &pred_dst); |
| add_src_reg(ctx, alu, src); |
| } else { |
| struct tgsi_src_register pred_src; |
| |
| get_predicate(ctx, &pred_dst, &pred_src); |
| |
| alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
| add_dst_reg(ctx, alu, &pred_dst); |
| add_src_reg(ctx, alu, &pred_src); |
| add_src_reg(ctx, alu, src); |
| |
| // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make |
| // sure src reg is valid if it was calculated with a predicate |
| // condition.. |
| alu->pred = IR2_PRED_NONE; |
| } |
| |
| /* save previous pred state to restore in pop_predicate(): */ |
| ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred; |
| |
| ctx->cf = NULL; |
| } |
| |
| static void |
| pop_predicate(struct fd2_compile_context *ctx) |
| { |
| /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by |
| * themselves: |
| */ |
| ctx->cf = NULL; |
| |
| /* restore previous predicate state: */ |
| ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; |
| |
| if (ctx->pred_depth != 0) { |
| struct ir2_instruction *alu; |
| struct tgsi_dst_register pred_dst; |
| struct tgsi_src_register pred_src; |
| |
| get_predicate(ctx, &pred_dst, &pred_src); |
| |
| alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs); |
| add_regs_dummy_vector(alu); |
| add_dst_reg(ctx, alu, &pred_dst); |
| add_src_reg(ctx, alu, &pred_src); |
| alu->pred = IR2_PRED_NONE; |
| } else { |
| /* predicate register no longer needed: */ |
| ctx->pred_reg = -1; |
| } |
| |
| ctx->cf = NULL; |
| } |
| |
| static void |
| get_immediate(struct fd2_compile_context *ctx, |
| struct tgsi_src_register *reg, uint32_t val) |
| { |
| unsigned neg, swiz, idx, i; |
| /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ |
| static const unsigned swiz2tgsi[] = { |
| TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, |
| }; |
| |
| for (i = 0; i < ctx->immediate_idx; i++) { |
| swiz = i % 4; |
| idx = i / 4; |
| |
| if (ctx->so->immediates[idx].val[swiz] == val) { |
| neg = 0; |
| break; |
| } |
| |
| if (ctx->so->immediates[idx].val[swiz] == -val) { |
| neg = 1; |
| break; |
| } |
| } |
| |
| if (i == ctx->immediate_idx) { |
| /* need to generate a new immediate: */ |
| swiz = i % 4; |
| idx = i / 4; |
| neg = 0; |
| ctx->so->immediates[idx].val[swiz] = val; |
| ctx->so->num_immediates = idx + 1; |
| ctx->immediate_idx++; |
| } |
| |
| reg->File = TGSI_FILE_IMMEDIATE; |
| reg->Indirect = 0; |
| reg->Dimension = 0; |
| reg->Index = idx; |
| reg->Absolute = 0; |
| reg->Negate = neg; |
| reg->SwizzleX = swiz2tgsi[swiz]; |
| reg->SwizzleY = swiz2tgsi[swiz]; |
| reg->SwizzleZ = swiz2tgsi[swiz]; |
| reg->SwizzleW = swiz2tgsi[swiz]; |
| } |
| |
| /* POW(a,b) = EXP2(b * LOG2(a)) */ |
| static void |
| translate_pow(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst) |
| { |
| struct tgsi_dst_register tmp_dst; |
| struct tgsi_src_register tmp_src; |
| struct ir2_instruction *alu; |
| |
| get_internal_temp(ctx, &tmp_dst, &tmp_src); |
| |
| alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); |
| add_regs_dummy_vector(alu); |
| add_dst_reg(ctx, alu, &tmp_dst); |
| add_src_reg(ctx, alu, &inst->Src[0].Register); |
| |
| alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
| add_dst_reg(ctx, alu, &tmp_dst); |
| add_src_reg(ctx, alu, &tmp_src); |
| add_src_reg(ctx, alu, &inst->Src[1].Register); |
| |
| /* NOTE: some of the instructions, like EXP_IEEE, seem hard- |
| * coded to take their input from the w component. |
| */ |
| switch(inst->Dst[0].Register.WriteMask) { |
| case TGSI_WRITEMASK_X: |
| tmp_src.SwizzleW = TGSI_SWIZZLE_X; |
| break; |
| case TGSI_WRITEMASK_Y: |
| tmp_src.SwizzleW = TGSI_SWIZZLE_Y; |
| break; |
| case TGSI_WRITEMASK_Z: |
| tmp_src.SwizzleW = TGSI_SWIZZLE_Z; |
| break; |
| case TGSI_WRITEMASK_W: |
| tmp_src.SwizzleW = TGSI_SWIZZLE_W; |
| break; |
| default: |
| DBG("invalid writemask!"); |
| assert(0); |
| break; |
| } |
| |
| alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE); |
| add_regs_dummy_vector(alu); |
| add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
| add_src_reg(ctx, alu, &tmp_src); |
| add_scalar_clamp(inst, alu); |
| } |
| |
| static void |
| translate_tex(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, unsigned opc) |
| { |
| struct ir2_instruction *instr; |
| struct ir2_register *reg; |
| struct tgsi_dst_register tmp_dst; |
| struct tgsi_src_register tmp_src; |
| const struct tgsi_src_register *coord; |
| bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || |
| inst->Instruction.Saturate; |
| int idx; |
| |
| if (using_temp || (opc == TGSI_OPCODE_TXP)) |
| get_internal_temp(ctx, &tmp_dst, &tmp_src); |
| |
| if (opc == TGSI_OPCODE_TXP) { |
| static const char *swiz[] = { |
| [TGSI_SWIZZLE_X] = "xxxx", |
| [TGSI_SWIZZLE_Y] = "yyyy", |
| [TGSI_SWIZZLE_Z] = "zzzz", |
| [TGSI_SWIZZLE_W] = "wwww", |
| }; |
| |
| /* TXP - Projective Texture Lookup: |
| * |
| * coord.x = src0.x / src.w |
| * coord.y = src0.y / src.w |
| * coord.z = src0.z / src.w |
| * coord.w = src0.w |
| * bias = 0.0 |
| * |
| * dst = texture_sample(unit, coord, bias) |
| */ |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE); |
| |
| /* MAXv: */ |
| add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; |
| add_src_reg(ctx, instr, &inst->Src[0].Register); |
| add_src_reg(ctx, instr, &inst->Src[0].Register); |
| |
| /* RECIP_IEEE: */ |
| add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; |
| add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = |
| swiz[inst->Src[0].Register.SwizzleW]; |
| |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; |
| add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; |
| add_src_reg(ctx, instr, &inst->Src[0].Register); |
| |
| coord = &tmp_src; |
| } else { |
| coord = &inst->Src[0].Register; |
| } |
| |
| instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH); |
| instr->fetch.opc = TEX_FETCH; |
| instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D); |
| assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases? |
| |
| /* save off the tex fetch to be patched later with correct const_idx: */ |
| idx = ctx->so->num_tfetch_instrs++; |
| ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; |
| ctx->so->tfetch_instrs[idx].instr = instr; |
| |
| add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register); |
| reg = add_src_reg(ctx, instr, coord); |
| |
| /* blob compiler always sets 3rd component to same as 1st for 2d: */ |
| if (inst->Texture.Texture == TGSI_TEXTURE_2D) |
| reg->swizzle[2] = reg->swizzle[0]; |
| |
| /* dst register needs to be marked for sync: */ |
| ctx->need_sync |= 1 << instr->regs[0]->num; |
| |
| /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */ |
| instr->sync = true; |
| |
| if (using_temp) { |
| /* texture fetch can't write directly to export, so if tgsi |
| * is telling us the dst register is in output file, we load |
| * the texture to a temp and the use ALU instruction to move |
| * to output |
| */ |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0); |
| |
| add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
| add_src_reg(ctx, instr, &tmp_src); |
| add_src_reg(ctx, instr, &tmp_src); |
| add_vector_clamp(inst, instr); |
| } |
| } |
| |
| /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */ |
| /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */ |
| static void |
| translate_sge_slt(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, unsigned opc) |
| { |
| struct ir2_instruction *instr; |
| struct tgsi_dst_register tmp_dst; |
| struct tgsi_src_register tmp_src; |
| struct tgsi_src_register tmp_const; |
| float c0, c1; |
| |
| switch (opc) { |
| default: |
| assert(0); |
| case TGSI_OPCODE_SGE: |
| c0 = 1.0; |
| c1 = 0.0; |
| break; |
| case TGSI_OPCODE_SLT: |
| c0 = 0.0; |
| c1 = 1.0; |
| break; |
| } |
| |
| get_internal_temp(ctx, &tmp_dst, &tmp_src); |
| |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst); |
| add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; |
| add_src_reg(ctx, instr, &inst->Src[1].Register); |
| |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0); |
| add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
| /* maybe should re-arrange the syntax some day, but |
| * in assembler/disassembler and what ir.c expects |
| * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 |
| */ |
| get_immediate(ctx, &tmp_const, fui(c0)); |
| add_src_reg(ctx, instr, &tmp_const); |
| add_src_reg(ctx, instr, &tmp_src); |
| get_immediate(ctx, &tmp_const, fui(c1)); |
| add_src_reg(ctx, instr, &tmp_const); |
| } |
| |
| /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ |
| static void |
| translate_lrp(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, |
| unsigned opc) |
| { |
| struct ir2_instruction *instr; |
| struct tgsi_dst_register tmp_dst1, tmp_dst2; |
| struct tgsi_src_register tmp_src1, tmp_src2; |
| struct tgsi_src_register tmp_const; |
| |
| get_internal_temp(ctx, &tmp_dst1, &tmp_src1); |
| get_internal_temp(ctx, &tmp_dst2, &tmp_src2); |
| |
| get_immediate(ctx, &tmp_const, fui(1.0)); |
| |
| /* tmp1 = (a * b) */ |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst1); |
| add_src_reg(ctx, instr, &inst->Src[0].Register); |
| add_src_reg(ctx, instr, &inst->Src[1].Register); |
| |
| /* tmp2 = (1 - a) */ |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst2); |
| add_src_reg(ctx, instr, &tmp_const); |
| add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; |
| |
| /* tmp2 = tmp2 * c */ |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst2); |
| add_src_reg(ctx, instr, &tmp_src2); |
| add_src_reg(ctx, instr, &inst->Src[2].Register); |
| |
| /* dst = tmp1 + tmp2 */ |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); |
| add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
| add_src_reg(ctx, instr, &tmp_src1); |
| add_src_reg(ctx, instr, &tmp_src2); |
| } |
| |
| static void |
| translate_trig(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst, |
| unsigned opc) |
| { |
| struct ir2_instruction *instr; |
| struct tgsi_dst_register tmp_dst; |
| struct tgsi_src_register tmp_src; |
| struct tgsi_src_register tmp_const; |
| instr_scalar_opc_t op; |
| |
| switch (opc) { |
| default: |
| assert(0); |
| case TGSI_OPCODE_SIN: |
| op = SIN; |
| break; |
| case TGSI_OPCODE_COS: |
| op = COS; |
| break; |
| } |
| |
| get_internal_temp(ctx, &tmp_dst, &tmp_src); |
| |
| tmp_dst.WriteMask = TGSI_WRITEMASK_X; |
| tmp_src.SwizzleX = tmp_src.SwizzleY = |
| tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X; |
| |
| /* maybe should re-arrange the syntax some day, but |
| * in assembler/disassembler and what ir.c expects |
| * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 |
| */ |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst); |
| get_immediate(ctx, &tmp_const, fui(0.5)); |
| add_src_reg(ctx, instr, &tmp_const); |
| add_src_reg(ctx, instr, &inst->Src[0].Register); |
| get_immediate(ctx, &tmp_const, fui(0.159155)); |
| add_src_reg(ctx, instr, &tmp_const); |
| |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst); |
| add_src_reg(ctx, instr, &tmp_src); |
| add_src_reg(ctx, instr, &tmp_src); |
| |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); |
| add_dst_reg(ctx, instr, &tmp_dst); |
| get_immediate(ctx, &tmp_const, fui(-3.141593)); |
| add_src_reg(ctx, instr, &tmp_const); |
| add_src_reg(ctx, instr, &tmp_src); |
| get_immediate(ctx, &tmp_const, fui(6.283185)); |
| add_src_reg(ctx, instr, &tmp_const); |
| |
| instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op); |
| add_regs_dummy_vector(instr); |
| add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
| add_src_reg(ctx, instr, &tmp_src); |
| } |
| |
| /* |
| * Main part of compiler/translator: |
| */ |
| |
| static void |
| translate_instruction(struct fd2_compile_context *ctx, |
| struct tgsi_full_instruction *inst) |
| { |
| unsigned opc = inst->Instruction.Opcode; |
| struct ir2_instruction *instr; |
| static struct ir2_cf *cf; |
| |
| if (opc == TGSI_OPCODE_END) |
| return; |
| |
| if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { |
| unsigned num = inst->Dst[0].Register.Index; |
| /* seems like we need to ensure that position vs param/pixel |
| * exports don't end up in the same EXEC clause.. easy way |
| * to do this is force a new EXEC clause on first appearance |
| * of an position or param/pixel export. |
| */ |
| if ((num == ctx->position) || (num == ctx->psize)) { |
| if (ctx->num_position > 0) { |
| ctx->cf = NULL; |
| ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION, |
| ctx->num_position - 1); |
| ctx->num_position = 0; |
| } |
| } else { |
| if (ctx->num_param > 0) { |
| ctx->cf = NULL; |
| ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL, |
| ctx->num_param - 1); |
| ctx->num_param = 0; |
| } |
| } |
| } |
| |
| cf = next_exec_cf(ctx); |
| |
| /* TODO turn this into a table: */ |
| switch (opc) { |
| case TGSI_OPCODE_MOV: |
| instr = ir2_instr_create_alu(cf, MAXv, ~0); |
| add_regs_vector_1(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_RCP: |
| instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE); |
| add_regs_scalar_1(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_RSQ: |
| instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE); |
| add_regs_scalar_1(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_SQRT: |
| instr = ir2_instr_create_alu(cf, ~0, SQRT_IEEE); |
| add_regs_scalar_1(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_MUL: |
| instr = ir2_instr_create_alu(cf, MULv, ~0); |
| add_regs_vector_2(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_ADD: |
| instr = ir2_instr_create_alu(cf, ADDv, ~0); |
| add_regs_vector_2(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_DP3: |
| instr = ir2_instr_create_alu(cf, DOT3v, ~0); |
| add_regs_vector_2(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_DP4: |
| instr = ir2_instr_create_alu(cf, DOT4v, ~0); |
| add_regs_vector_2(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_MIN: |
| instr = ir2_instr_create_alu(cf, MINv, ~0); |
| add_regs_vector_2(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_MAX: |
| instr = ir2_instr_create_alu(cf, MAXv, ~0); |
| add_regs_vector_2(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_SLT: |
| case TGSI_OPCODE_SGE: |
| translate_sge_slt(ctx, inst, opc); |
| break; |
| case TGSI_OPCODE_MAD: |
| instr = ir2_instr_create_alu(cf, MULADDv, ~0); |
| add_regs_vector_3(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_LRP: |
| translate_lrp(ctx, inst, opc); |
| break; |
| case TGSI_OPCODE_FRC: |
| instr = ir2_instr_create_alu(cf, FRACv, ~0); |
| add_regs_vector_1(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_FLR: |
| instr = ir2_instr_create_alu(cf, FLOORv, ~0); |
| add_regs_vector_1(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_EX2: |
| instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE); |
| add_regs_scalar_1(ctx, inst, instr); |
| break; |
| case TGSI_OPCODE_POW: |
| translate_pow(ctx, inst); |
| break; |
| case TGSI_OPCODE_COS: |
| case TGSI_OPCODE_SIN: |
| translate_trig(ctx, inst, opc); |
| break; |
| case TGSI_OPCODE_TEX: |
| case TGSI_OPCODE_TXP: |
| translate_tex(ctx, inst, opc); |
| break; |
| case TGSI_OPCODE_CMP: |
| instr = ir2_instr_create_alu(cf, CNDGTEv, ~0); |
| add_regs_vector_3(ctx, inst, instr); |
| // TODO this should be src0 if regs where in sane order.. |
| instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */ |
| break; |
| case TGSI_OPCODE_IF: |
| push_predicate(ctx, &inst->Src[0].Register); |
| ctx->so->ir->pred = IR2_PRED_EQ; |
| break; |
| case TGSI_OPCODE_ELSE: |
| ctx->so->ir->pred = IR2_PRED_NE; |
| /* not sure if this is required in all cases, but blob compiler |
| * won't combine EQ and NE in same CF: |
| */ |
| ctx->cf = NULL; |
| break; |
| case TGSI_OPCODE_ENDIF: |
| pop_predicate(ctx); |
| break; |
| case TGSI_OPCODE_F2I: |
| instr = ir2_instr_create_alu(cf, TRUNCv, ~0); |
| add_regs_vector_1(ctx, inst, instr); |
| break; |
| default: |
| DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc)); |
| tgsi_dump(ctx->so->tokens, 0); |
| assert(0); |
| break; |
| } |
| |
| /* internal temporaries are only valid for the duration of a single |
| * TGSI instruction: |
| */ |
| ctx->num_internal_temps = 0; |
| } |
| |
| static void |
| compile_instructions(struct fd2_compile_context *ctx) |
| { |
| while (!tgsi_parse_end_of_tokens(&ctx->parser)) { |
| tgsi_parse_token(&ctx->parser); |
| |
| switch (ctx->parser.FullToken.Token.Type) { |
| case TGSI_TOKEN_TYPE_INSTRUCTION: |
| translate_instruction(ctx, |
| &ctx->parser.FullToken.FullInstruction); |
| break; |
| default: |
| break; |
| } |
| } |
| |
| ctx->cf->cf_type = EXEC_END; |
| } |
| |
| int |
| fd2_compile_shader(struct fd_program_stateobj *prog, |
| struct fd2_shader_stateobj *so) |
| { |
| struct fd2_compile_context ctx; |
| |
| ir2_shader_destroy(so->ir); |
| so->ir = ir2_shader_create(); |
| so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0; |
| |
| if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK) |
| return -1; |
| |
| if (ctx.type == PIPE_SHADER_VERTEX) { |
| compile_vtx_fetch(&ctx); |
| } else if (ctx.type == PIPE_SHADER_FRAGMENT) { |
| prog->num_exports = 0; |
| memset(prog->export_linkage, 0xff, |
| sizeof(prog->export_linkage)); |
| } |
| |
| compile_instructions(&ctx); |
| |
| compile_free(&ctx); |
| |
| return 0; |
| } |
| |