| /************************************************************************** |
| * |
| * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sub license, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| **************************************************************************/ |
| |
| /** |
| * TGSI interpretor/executor. |
| * |
| * Flow control information: |
| * |
| * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) |
| * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special |
| * care since a condition may be true for some quad components but false |
| * for other components. |
| * |
| * We basically execute all statements (even if they're in the part of |
| * an IF/ELSE clause that's "not taken") and use a special mask to |
| * control writing to destination registers. This is the ExecMask. |
| * See store_dest(). |
| * |
| * The ExecMask is computed from three other masks (CondMask, LoopMask and |
| * ContMask) which are controlled by the flow control instructions (namely: |
| * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). |
| * |
| * |
| * Authors: |
| * Michal Krol |
| * Brian Paul |
| */ |
| |
| #include <transpose_matrix4x4.h> |
| #include <simdmath/ceilf4.h> |
| #include <simdmath/cosf4.h> |
| #include <simdmath/divf4.h> |
| #include <simdmath/floorf4.h> |
| #include <simdmath/log2f4.h> |
| #include <simdmath/powf4.h> |
| #include <simdmath/sinf4.h> |
| #include <simdmath/sqrtf4.h> |
| #include <simdmath/truncf4.h> |
| |
| #include "pipe/p_compiler.h" |
| #include "pipe/p_state.h" |
| #include "pipe/p_shader_tokens.h" |
| #include "tgsi/tgsi_parse.h" |
| #include "tgsi/tgsi_util.h" |
| #include "spu_exec.h" |
| #include "spu_main.h" |
| #include "spu_vertex_shader.h" |
| #include "spu_dcache.h" |
| #include "cell/common.h" |
| |
| #define TILE_TOP_LEFT 0 |
| #define TILE_TOP_RIGHT 1 |
| #define TILE_BOTTOM_LEFT 2 |
| #define TILE_BOTTOM_RIGHT 3 |
| |
| /* |
| * Shorthand locations of various utility registers (_I = Index, _C = Channel) |
| */ |
| #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I |
| #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C |
| #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I |
| #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C |
| #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I |
| #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C |
| #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I |
| #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C |
| #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I |
| #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C |
| #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I |
| #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C |
| #define TEMP_128_I TGSI_EXEC_TEMP_128_I |
| #define TEMP_128_C TGSI_EXEC_TEMP_128_C |
| #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I |
| #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C |
| #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I |
| #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C |
| #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I |
| #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C |
| #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I |
| #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C |
| #define TEMP_R0 TGSI_EXEC_TEMP_R0 |
| |
| #define FOR_EACH_CHANNEL(CHAN)\ |
| for (CHAN = 0; CHAN < 4; CHAN++) |
| |
| #define IS_CHANNEL_ENABLED(INST, CHAN)\ |
| ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) |
| |
| #define IS_CHANNEL_ENABLED2(INST, CHAN)\ |
| ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) |
| |
| #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ |
| FOR_EACH_CHANNEL( CHAN )\ |
| if (IS_CHANNEL_ENABLED( INST, CHAN )) |
| |
| #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ |
| FOR_EACH_CHANNEL( CHAN )\ |
| if (IS_CHANNEL_ENABLED2( INST, CHAN )) |
| |
| |
| /** The execution mask depends on the conditional mask and the loop mask */ |
| #define UPDATE_EXEC_MASK(MACH) \ |
| MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask |
| |
| |
| #define CHAN_X 0 |
| #define CHAN_Y 1 |
| #define CHAN_Z 2 |
| #define CHAN_W 3 |
| |
| |
| |
| /** |
| * Initialize machine state by expanding tokens to full instructions, |
| * allocating temporary storage, setting up constants, etc. |
| * After this, we can call spu_exec_machine_run() many times. |
| */ |
| void |
| spu_exec_machine_init(struct spu_exec_machine *mach, |
| uint numSamplers, |
| struct spu_sampler *samplers, |
| unsigned processor) |
| { |
| const qword zero = si_il(0); |
| const qword not_zero = si_il(~0); |
| |
| (void) numSamplers; |
| mach->Samplers = samplers; |
| mach->Processor = processor; |
| mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; |
| |
| /* Setup constants. */ |
| mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; |
| mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; |
| mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); |
| mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); |
| |
| mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); |
| mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); |
| mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); |
| mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); |
| } |
| |
| |
| static INLINE qword |
| micro_abs(qword src) |
| { |
| return si_rotmi(si_shli(src, 1), -1); |
| } |
| |
| static INLINE qword |
| micro_ceil(qword src) |
| { |
| return (qword) _ceilf4((vec_float4) src); |
| } |
| |
| static INLINE qword |
| micro_cos(qword src) |
| { |
| return (qword) _cosf4((vec_float4) src); |
| } |
| |
| static const qword br_shuf = { |
| TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, |
| TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, |
| TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, |
| TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, |
| TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, |
| TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, |
| TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, |
| TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, |
| }; |
| |
| static const qword bl_shuf = { |
| TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, |
| TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, |
| TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, |
| TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, |
| TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, |
| TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, |
| TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, |
| TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, |
| }; |
| |
| static const qword tl_shuf = { |
| TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, |
| TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, |
| TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, |
| TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, |
| TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, |
| TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, |
| TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, |
| TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, |
| }; |
| |
| static qword |
| micro_ddx(qword src) |
| { |
| qword bottom_right = si_shufb(src, src, br_shuf); |
| qword bottom_left = si_shufb(src, src, bl_shuf); |
| |
| return si_fs(bottom_right, bottom_left); |
| } |
| |
| static qword |
| micro_ddy(qword src) |
| { |
| qword top_left = si_shufb(src, src, tl_shuf); |
| qword bottom_left = si_shufb(src, src, bl_shuf); |
| |
| return si_fs(top_left, bottom_left); |
| } |
| |
| static INLINE qword |
| micro_div(qword src0, qword src1) |
| { |
| return (qword) _divf4((vec_float4) src0, (vec_float4) src1); |
| } |
| |
| static qword |
| micro_flr(qword src) |
| { |
| return (qword) _floorf4((vec_float4) src); |
| } |
| |
| static qword |
| micro_frc(qword src) |
| { |
| return si_fs(src, (qword) _floorf4((vec_float4) src)); |
| } |
| |
| static INLINE qword |
| micro_ge(qword src0, qword src1) |
| { |
| return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); |
| } |
| |
| static qword |
| micro_lg2(qword src) |
| { |
| return (qword) _log2f4((vec_float4) src); |
| } |
| |
| static INLINE qword |
| micro_lt(qword src0, qword src1) |
| { |
| const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); |
| |
| return si_xori(tmp, 0xff); |
| } |
| |
| static INLINE qword |
| micro_max(qword src0, qword src1) |
| { |
| return si_selb(src1, src0, si_fcgt(src0, src1)); |
| } |
| |
| static INLINE qword |
| micro_min(qword src0, qword src1) |
| { |
| return si_selb(src0, src1, si_fcgt(src0, src1)); |
| } |
| |
| static qword |
| micro_neg(qword src) |
| { |
| return si_xor(src, (qword) spu_splats(0x80000000)); |
| } |
| |
| static qword |
| micro_set_sign(qword src) |
| { |
| return si_or(src, (qword) spu_splats(0x80000000)); |
| } |
| |
| static qword |
| micro_pow(qword src0, qword src1) |
| { |
| return (qword) _powf4((vec_float4) src0, (vec_float4) src1); |
| } |
| |
| static qword |
| micro_rnd(qword src) |
| { |
| const qword half = (qword) spu_splats(0.5f); |
| |
| /* May be able to use _roundf4. There may be some difference, though. |
| */ |
| return (qword) _floorf4((vec_float4) si_fa(src, half)); |
| } |
| |
| static INLINE qword |
| micro_ishr(qword src0, qword src1) |
| { |
| return si_rotma(src0, si_sfi(src1, 0)); |
| } |
| |
| static qword |
| micro_trunc(qword src) |
| { |
| return (qword) _truncf4((vec_float4) src); |
| } |
| |
| static qword |
| micro_sin(qword src) |
| { |
| return (qword) _sinf4((vec_float4) src); |
| } |
| |
| static INLINE qword |
| micro_sqrt(qword src) |
| { |
| return (qword) _sqrtf4((vec_float4) src); |
| } |
| |
| static void |
| fetch_src_file_channel( |
| const struct spu_exec_machine *mach, |
| const uint file, |
| const uint swizzle, |
| const union spu_exec_channel *index, |
| union spu_exec_channel *chan ) |
| { |
| switch( swizzle ) { |
| case TGSI_SWIZZLE_X: |
| case TGSI_SWIZZLE_Y: |
| case TGSI_SWIZZLE_Z: |
| case TGSI_SWIZZLE_W: |
| switch( file ) { |
| case TGSI_FILE_CONSTANT: { |
| unsigned i; |
| |
| for (i = 0; i < 4; i++) { |
| const float *ptr = mach->Consts[index->i[i]]; |
| float tmp[4]; |
| |
| spu_dcache_fetch_unaligned((qword *) tmp, |
| (uintptr_t)(ptr + swizzle), |
| sizeof(float)); |
| |
| chan->f[i] = tmp[0]; |
| } |
| break; |
| } |
| |
| case TGSI_FILE_INPUT: |
| chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; |
| chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; |
| chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; |
| chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; |
| break; |
| |
| case TGSI_FILE_TEMPORARY: |
| chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; |
| chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; |
| chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; |
| chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; |
| break; |
| |
| case TGSI_FILE_IMMEDIATE: |
| ASSERT( index->i[0] < (int) mach->ImmLimit ); |
| ASSERT( index->i[1] < (int) mach->ImmLimit ); |
| ASSERT( index->i[2] < (int) mach->ImmLimit ); |
| ASSERT( index->i[3] < (int) mach->ImmLimit ); |
| |
| chan->f[0] = mach->Imms[index->i[0]][swizzle]; |
| chan->f[1] = mach->Imms[index->i[1]][swizzle]; |
| chan->f[2] = mach->Imms[index->i[2]][swizzle]; |
| chan->f[3] = mach->Imms[index->i[3]][swizzle]; |
| break; |
| |
| case TGSI_FILE_ADDRESS: |
| chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; |
| chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; |
| chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; |
| chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; |
| break; |
| |
| case TGSI_FILE_OUTPUT: |
| /* vertex/fragment output vars can be read too */ |
| chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; |
| chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; |
| chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; |
| chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; |
| break; |
| |
| default: |
| ASSERT( 0 ); |
| } |
| break; |
| |
| default: |
| ASSERT( 0 ); |
| } |
| } |
| |
| static void |
| fetch_source( |
| const struct spu_exec_machine *mach, |
| union spu_exec_channel *chan, |
| const struct tgsi_full_src_register *reg, |
| const uint chan_index ) |
| { |
| union spu_exec_channel index; |
| uint swizzle; |
| |
| index.i[0] = |
| index.i[1] = |
| index.i[2] = |
| index.i[3] = reg->Register.Index; |
| |
| if (reg->Register.Indirect) { |
| union spu_exec_channel index2; |
| union spu_exec_channel indir_index; |
| |
| index2.i[0] = |
| index2.i[1] = |
| index2.i[2] = |
| index2.i[3] = reg->Indirect.Index; |
| |
| swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, |
| CHAN_X); |
| fetch_src_file_channel( |
| mach, |
| reg->Indirect.File, |
| swizzle, |
| &index2, |
| &indir_index ); |
| |
| index.q = si_a(index.q, indir_index.q); |
| } |
| |
| if( reg->Register.Dimension ) { |
| switch( reg->Register.File ) { |
| case TGSI_FILE_INPUT: |
| index.q = si_mpyi(index.q, 17); |
| break; |
| case TGSI_FILE_CONSTANT: |
| index.q = si_shli(index.q, 12); |
| break; |
| default: |
| ASSERT( 0 ); |
| } |
| |
| index.i[0] += reg->Dimension.Index; |
| index.i[1] += reg->Dimension.Index; |
| index.i[2] += reg->Dimension.Index; |
| index.i[3] += reg->Dimension.Index; |
| |
| if (reg->Dimension.Indirect) { |
| union spu_exec_channel index2; |
| union spu_exec_channel indir_index; |
| |
| index2.i[0] = |
| index2.i[1] = |
| index2.i[2] = |
| index2.i[3] = reg->DimIndirect.Index; |
| |
| swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); |
| fetch_src_file_channel( |
| mach, |
| reg->DimIndirect.File, |
| swizzle, |
| &index2, |
| &indir_index ); |
| |
| index.q = si_a(index.q, indir_index.q); |
| } |
| } |
| |
| swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); |
| fetch_src_file_channel( |
| mach, |
| reg->Register.File, |
| swizzle, |
| &index, |
| chan ); |
| |
| switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { |
| case TGSI_UTIL_SIGN_CLEAR: |
| chan->q = micro_abs(chan->q); |
| break; |
| |
| case TGSI_UTIL_SIGN_SET: |
| chan->q = micro_set_sign(chan->q); |
| break; |
| |
| case TGSI_UTIL_SIGN_TOGGLE: |
| chan->q = micro_neg(chan->q); |
| break; |
| |
| case TGSI_UTIL_SIGN_KEEP: |
| break; |
| } |
| |
| if (reg->RegisterExtMod.Complement) { |
| chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); |
| } |
| } |
| |
| static void |
| store_dest( |
| struct spu_exec_machine *mach, |
| const union spu_exec_channel *chan, |
| const struct tgsi_full_dst_register *reg, |
| const struct tgsi_full_instruction *inst, |
| uint chan_index ) |
| { |
| union spu_exec_channel *dst; |
| |
| switch( reg->Register.File ) { |
| case TGSI_FILE_NULL: |
| return; |
| |
| case TGSI_FILE_OUTPUT: |
| dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] |
| + reg->Register.Index].xyzw[chan_index]; |
| break; |
| |
| case TGSI_FILE_TEMPORARY: |
| dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; |
| break; |
| |
| case TGSI_FILE_ADDRESS: |
| dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; |
| break; |
| |
| default: |
| ASSERT( 0 ); |
| return; |
| } |
| |
| switch (inst->Instruction.Saturate) |
| { |
| case TGSI_SAT_NONE: |
| if (mach->ExecMask & 0x1) |
| dst->i[0] = chan->i[0]; |
| if (mach->ExecMask & 0x2) |
| dst->i[1] = chan->i[1]; |
| if (mach->ExecMask & 0x4) |
| dst->i[2] = chan->i[2]; |
| if (mach->ExecMask & 0x8) |
| dst->i[3] = chan->i[3]; |
| break; |
| |
| case TGSI_SAT_ZERO_ONE: |
| /* XXX need to obey ExecMask here */ |
| dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); |
| dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); |
| break; |
| |
| case TGSI_SAT_MINUS_PLUS_ONE: |
| ASSERT( 0 ); |
| break; |
| |
| default: |
| ASSERT( 0 ); |
| } |
| } |
| |
| #define FETCH(VAL,INDEX,CHAN)\ |
| fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) |
| |
| #define STORE(VAL,INDEX,CHAN)\ |
| store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) |
| |
| |
| /** |
| * Execute ARB-style KIL which is predicated by a src register. |
| * Kill fragment if any of the four values is less than zero. |
| */ |
| static void |
| exec_kil(struct spu_exec_machine *mach, |
| const struct tgsi_full_instruction *inst) |
| { |
| uint uniquemask; |
| uint chan_index; |
| uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ |
| union spu_exec_channel r[1]; |
| |
| /* This mask stores component bits that were already tested. */ |
| uniquemask = 0; |
| |
| for (chan_index = 0; chan_index < 4; chan_index++) |
| { |
| uint swizzle; |
| uint i; |
| |
| /* unswizzle channel */ |
| swizzle = tgsi_util_get_full_src_register_swizzle ( |
| &inst->Src[0], |
| chan_index); |
| |
| /* check if the component has not been already tested */ |
| if (uniquemask & (1 << swizzle)) |
| continue; |
| uniquemask |= 1 << swizzle; |
| |
| FETCH(&r[0], 0, chan_index); |
| for (i = 0; i < 4; i++) |
| if (r[0].f[i] < 0.0f) |
| kilmask |= 1 << i; |
| } |
| |
| mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; |
| } |
| |
| /** |
| * Execute NVIDIA-style KIL which is predicated by a condition code. |
| * Kill fragment if the condition code is TRUE. |
| */ |
| static void |
| exec_kilp(struct tgsi_exec_machine *mach, |
| const struct tgsi_full_instruction *inst) |
| { |
| uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ |
| |
| /* TODO: build kilmask from CC mask */ |
| |
| mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; |
| } |
| |
| /* |
| * Fetch a texel using STR texture coordinates. |
| */ |
| static void |
| fetch_texel( struct spu_sampler *sampler, |
| const union spu_exec_channel *s, |
| const union spu_exec_channel *t, |
| const union spu_exec_channel *p, |
| float lodbias, /* XXX should be float[4] */ |
| union spu_exec_channel *r, |
| union spu_exec_channel *g, |
| union spu_exec_channel *b, |
| union spu_exec_channel *a ) |
| { |
| qword rgba[4]; |
| qword out[4]; |
| |
| sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, |
| (float (*)[4]) rgba); |
| |
| _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); |
| r->q = out[0]; |
| g->q = out[1]; |
| b->q = out[2]; |
| a->q = out[3]; |
| } |
| |
| |
| static void |
| exec_tex(struct spu_exec_machine *mach, |
| const struct tgsi_full_instruction *inst, |
| boolean biasLod, boolean projected) |
| { |
| const uint unit = inst->Src[1].Register.Index; |
| union spu_exec_channel r[8]; |
| uint chan_index; |
| float lodBias; |
| |
| /* printf("Sampler %u unit %u\n", sampler, unit); */ |
| |
| switch (inst->InstructionExtTexture.Texture) { |
| case TGSI_TEXTURE_1D: |
| |
| FETCH(&r[0], 0, CHAN_X); |
| |
| if (projected) { |
| FETCH(&r[1], 0, CHAN_W); |
| r[0].q = micro_div(r[0].q, r[1].q); |
| } |
| |
| if (biasLod) { |
| FETCH(&r[1], 0, CHAN_W); |
| lodBias = r[2].f[0]; |
| } |
| else |
| lodBias = 0.0; |
| |
| fetch_texel(&mach->Samplers[unit], |
| &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ |
| &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
| break; |
| |
| case TGSI_TEXTURE_2D: |
| case TGSI_TEXTURE_RECT: |
| |
| FETCH(&r[0], 0, CHAN_X); |
| FETCH(&r[1], 0, CHAN_Y); |
| FETCH(&r[2], 0, CHAN_Z); |
| |
| if (projected) { |
| FETCH(&r[3], 0, CHAN_W); |
| r[0].q = micro_div(r[0].q, r[3].q); |
| r[1].q = micro_div(r[1].q, r[3].q); |
| r[2].q = micro_div(r[2].q, r[3].q); |
| } |
| |
| if (biasLod) { |
| FETCH(&r[3], 0, CHAN_W); |
| lodBias = r[3].f[0]; |
| } |
| else |
| lodBias = 0.0; |
| |
| fetch_texel(&mach->Samplers[unit], |
| &r[0], &r[1], &r[2], lodBias, /* inputs */ |
| &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
| break; |
| |
| case TGSI_TEXTURE_3D: |
| case TGSI_TEXTURE_CUBE: |
| |
| FETCH(&r[0], 0, CHAN_X); |
| FETCH(&r[1], 0, CHAN_Y); |
| FETCH(&r[2], 0, CHAN_Z); |
| |
| if (projected) { |
| FETCH(&r[3], 0, CHAN_W); |
| r[0].q = micro_div(r[0].q, r[3].q); |
| r[1].q = micro_div(r[1].q, r[3].q); |
| r[2].q = micro_div(r[2].q, r[3].q); |
| } |
| |
| if (biasLod) { |
| FETCH(&r[3], 0, CHAN_W); |
| lodBias = r[3].f[0]; |
| } |
| else |
| lodBias = 0.0; |
| |
| fetch_texel(&mach->Samplers[unit], |
| &r[0], &r[1], &r[2], lodBias, |
| &r[0], &r[1], &r[2], &r[3]); |
| break; |
| |
| default: |
| ASSERT (0); |
| } |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[chan_index], 0, chan_index ); |
| } |
| } |
| |
| |
| |
| static void |
| constant_interpolation( |
| struct spu_exec_machine *mach, |
| unsigned attrib, |
| unsigned chan ) |
| { |
| unsigned i; |
| |
| for( i = 0; i < QUAD_SIZE; i++ ) { |
| mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; |
| } |
| } |
| |
| static void |
| linear_interpolation( |
| struct spu_exec_machine *mach, |
| unsigned attrib, |
| unsigned chan ) |
| { |
| const float x = mach->QuadPos.xyzw[0].f[0]; |
| const float y = mach->QuadPos.xyzw[1].f[0]; |
| const float dadx = mach->InterpCoefs[attrib].dadx[chan]; |
| const float dady = mach->InterpCoefs[attrib].dady[chan]; |
| const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; |
| mach->Inputs[attrib].xyzw[chan].f[0] = a0; |
| mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; |
| mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; |
| mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; |
| } |
| |
| static void |
| perspective_interpolation( |
| struct spu_exec_machine *mach, |
| unsigned attrib, |
| unsigned chan ) |
| { |
| const float x = mach->QuadPos.xyzw[0].f[0]; |
| const float y = mach->QuadPos.xyzw[1].f[0]; |
| const float dadx = mach->InterpCoefs[attrib].dadx[chan]; |
| const float dady = mach->InterpCoefs[attrib].dady[chan]; |
| const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; |
| const float *w = mach->QuadPos.xyzw[3].f; |
| /* divide by W here */ |
| mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; |
| mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; |
| mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; |
| mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; |
| } |
| |
| |
| typedef void (* interpolation_func)( |
| struct spu_exec_machine *mach, |
| unsigned attrib, |
| unsigned chan ); |
| |
| static void |
| exec_declaration(struct spu_exec_machine *mach, |
| const struct tgsi_full_declaration *decl) |
| { |
| if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { |
| if( decl->Declaration.File == TGSI_FILE_INPUT ) { |
| unsigned first, last, mask; |
| interpolation_func interp; |
| |
| first = decl->Range.First; |
| last = decl->Range.Last; |
| mask = decl->Declaration.UsageMask; |
| |
| switch( decl->Declaration.Interpolate ) { |
| case TGSI_INTERPOLATE_CONSTANT: |
| interp = constant_interpolation; |
| break; |
| |
| case TGSI_INTERPOLATE_LINEAR: |
| interp = linear_interpolation; |
| break; |
| |
| case TGSI_INTERPOLATE_PERSPECTIVE: |
| interp = perspective_interpolation; |
| break; |
| |
| default: |
| ASSERT( 0 ); |
| } |
| |
| if( mask == TGSI_WRITEMASK_XYZW ) { |
| unsigned i, j; |
| |
| for( i = first; i <= last; i++ ) { |
| for( j = 0; j < NUM_CHANNELS; j++ ) { |
| interp( mach, i, j ); |
| } |
| } |
| } |
| else { |
| unsigned i, j; |
| |
| for( j = 0; j < NUM_CHANNELS; j++ ) { |
| if( mask & (1 << j) ) { |
| for( i = first; i <= last; i++ ) { |
| interp( mach, i, j ); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| static void |
| exec_instruction( |
| struct spu_exec_machine *mach, |
| const struct tgsi_full_instruction *inst, |
| int *pc ) |
| { |
| uint chan_index; |
| union spu_exec_channel r[8]; |
| |
| (*pc)++; |
| |
| switch (inst->Instruction.Opcode) { |
| case TGSI_OPCODE_ARL: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = si_cflts(r[0].q, 0); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_MOV: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_LIT: |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { |
| STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); |
| } |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { |
| FETCH( &r[0], 0, CHAN_X ); |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { |
| r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); |
| STORE( &r[0], 0, CHAN_Y ); |
| } |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { |
| FETCH( &r[1], 0, CHAN_Y ); |
| r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); |
| |
| FETCH( &r[2], 0, CHAN_W ); |
| r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); |
| r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); |
| r[1].q = micro_pow(r[1].q, r[2].q); |
| |
| /* r0 = (r0 > 0.0) ? r1 : 0.0 |
| */ |
| r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); |
| r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, |
| r[0].q); |
| STORE( &r[0], 0, CHAN_Z ); |
| } |
| } |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { |
| STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); |
| } |
| break; |
| |
| case TGSI_OPCODE_RCP: |
| FETCH( &r[0], 0, CHAN_X ); |
| r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_RSQ: |
| FETCH( &r[0], 0, CHAN_X ); |
| r[0].q = micro_sqrt(r[0].q); |
| r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_EXP: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_LOG: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_MUL: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) |
| { |
| FETCH(&r[0], 0, chan_index); |
| FETCH(&r[1], 1, chan_index); |
| |
| r[0].q = si_fm(r[0].q, r[1].q); |
| |
| STORE(&r[0], 0, chan_index); |
| } |
| break; |
| |
| case TGSI_OPCODE_ADD: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| r[0].q = si_fa(r[0].q, r[1].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_DP3: |
| /* TGSI_OPCODE_DOT3 */ |
| FETCH( &r[0], 0, CHAN_X ); |
| FETCH( &r[1], 1, CHAN_X ); |
| r[0].q = si_fm(r[0].q, r[1].q); |
| |
| FETCH( &r[1], 0, CHAN_Y ); |
| FETCH( &r[2], 1, CHAN_Y ); |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| |
| FETCH( &r[1], 0, CHAN_Z ); |
| FETCH( &r[2], 1, CHAN_Z ); |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_DP4: |
| /* TGSI_OPCODE_DOT4 */ |
| FETCH(&r[0], 0, CHAN_X); |
| FETCH(&r[1], 1, CHAN_X); |
| |
| r[0].q = si_fm(r[0].q, r[1].q); |
| |
| FETCH(&r[1], 0, CHAN_Y); |
| FETCH(&r[2], 1, CHAN_Y); |
| |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| FETCH(&r[1], 0, CHAN_Z); |
| FETCH(&r[2], 1, CHAN_Z); |
| |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| FETCH(&r[1], 0, CHAN_W); |
| FETCH(&r[2], 1, CHAN_W); |
| |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_DST: |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { |
| STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); |
| } |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { |
| FETCH( &r[0], 0, CHAN_Y ); |
| FETCH( &r[1], 1, CHAN_Y); |
| r[0].q = si_fm(r[0].q, r[1].q); |
| STORE( &r[0], 0, CHAN_Y ); |
| } |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { |
| FETCH( &r[0], 0, CHAN_Z ); |
| STORE( &r[0], 0, CHAN_Z ); |
| } |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { |
| FETCH( &r[0], 1, CHAN_W ); |
| STORE( &r[0], 0, CHAN_W ); |
| } |
| break; |
| |
| case TGSI_OPCODE_MIN: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH(&r[0], 0, chan_index); |
| FETCH(&r[1], 1, chan_index); |
| |
| r[0].q = micro_min(r[0].q, r[1].q); |
| |
| STORE(&r[0], 0, chan_index); |
| } |
| break; |
| |
| case TGSI_OPCODE_MAX: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH(&r[0], 0, chan_index); |
| FETCH(&r[1], 1, chan_index); |
| |
| r[0].q = micro_max(r[0].q, r[1].q); |
| |
| STORE(&r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SLT: |
| /* TGSI_OPCODE_SETLT */ |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| |
| r[0].q = micro_ge(r[0].q, r[1].q); |
| r[0].q = si_xori(r[0].q, 0xff); |
| |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SGE: |
| /* TGSI_OPCODE_SETGE */ |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| r[0].q = micro_ge(r[0].q, r[1].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_MAD: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| FETCH( &r[2], 2, chan_index ); |
| r[0].q = si_fma(r[0].q, r[1].q, r[2].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SUB: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH(&r[0], 0, chan_index); |
| FETCH(&r[1], 1, chan_index); |
| |
| r[0].q = si_fs(r[0].q, r[1].q); |
| |
| STORE(&r[0], 0, chan_index); |
| } |
| break; |
| |
| case TGSI_OPCODE_LRP: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH(&r[0], 0, chan_index); |
| FETCH(&r[1], 1, chan_index); |
| FETCH(&r[2], 2, chan_index); |
| |
| r[1].q = si_fs(r[1].q, r[2].q); |
| r[0].q = si_fma(r[0].q, r[1].q, r[2].q); |
| |
| STORE(&r[0], 0, chan_index); |
| } |
| break; |
| |
| case TGSI_OPCODE_CND: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_DP2A: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_FRC: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = micro_frc(r[0].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_CLAMP: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_FLR: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = micro_flr(r[0].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_ROUND: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = micro_rnd(r[0].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_EX2: |
| FETCH(&r[0], 0, CHAN_X); |
| |
| r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_LG2: |
| FETCH( &r[0], 0, CHAN_X ); |
| r[0].q = micro_lg2(r[0].q); |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_POW: |
| FETCH(&r[0], 0, CHAN_X); |
| FETCH(&r[1], 1, CHAN_X); |
| |
| r[0].q = micro_pow(r[0].q, r[1].q); |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_XPD: |
| /* TGSI_OPCODE_XPD */ |
| FETCH(&r[0], 0, CHAN_Y); |
| FETCH(&r[1], 1, CHAN_Z); |
| FETCH(&r[3], 0, CHAN_Z); |
| FETCH(&r[4], 1, CHAN_Y); |
| |
| /* r2 = (r0 * r1) - (r3 * r5) |
| */ |
| r[2].q = si_fm(r[3].q, r[5].q); |
| r[2].q = si_fms(r[0].q, r[1].q, r[2].q); |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { |
| STORE( &r[2], 0, CHAN_X ); |
| } |
| |
| FETCH(&r[2], 1, CHAN_X); |
| FETCH(&r[5], 0, CHAN_X); |
| |
| /* r3 = (r3 * r2) - (r1 * r5) |
| */ |
| r[1].q = si_fm(r[1].q, r[5].q); |
| r[3].q = si_fms(r[3].q, r[2].q, r[1].q); |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { |
| STORE( &r[3], 0, CHAN_Y ); |
| } |
| |
| /* r5 = (r5 * r4) - (r0 * r2) |
| */ |
| r[0].q = si_fm(r[0].q, r[2].q); |
| r[5].q = si_fms(r[5].q, r[4].q, r[0].q); |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { |
| STORE( &r[5], 0, CHAN_Z ); |
| } |
| |
| if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { |
| STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); |
| } |
| break; |
| |
| case TGSI_OPCODE_ABS: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH(&r[0], 0, chan_index); |
| |
| r[0].q = micro_abs(r[0].q); |
| |
| STORE(&r[0], 0, chan_index); |
| } |
| break; |
| |
| case TGSI_OPCODE_RCC: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_DPH: |
| FETCH(&r[0], 0, CHAN_X); |
| FETCH(&r[1], 1, CHAN_X); |
| |
| r[0].q = si_fm(r[0].q, r[1].q); |
| |
| FETCH(&r[1], 0, CHAN_Y); |
| FETCH(&r[2], 1, CHAN_Y); |
| |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| FETCH(&r[1], 0, CHAN_Z); |
| FETCH(&r[2], 1, CHAN_Z); |
| |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| FETCH(&r[1], 1, CHAN_W); |
| |
| r[0].q = si_fa(r[0].q, r[1].q); |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_COS: |
| FETCH(&r[0], 0, CHAN_X); |
| |
| r[0].q = micro_cos(r[0].q); |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_DDX: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = micro_ddx(r[0].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_DDY: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = micro_ddy(r[0].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_KILP: |
| exec_kilp (mach, inst); |
| break; |
| |
| case TGSI_OPCODE_KIL: |
| exec_kil (mach, inst); |
| break; |
| |
| case TGSI_OPCODE_PK2H: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_PK2US: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_PK4B: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_PK4UB: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_RFL: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_SEQ: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| |
| r[0].q = si_fceq(r[0].q, r[1].q); |
| |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SFL: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_SGT: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| r[0].q = si_fcgt(r[0].q, r[1].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SIN: |
| FETCH( &r[0], 0, CHAN_X ); |
| r[0].q = micro_sin(r[0].q); |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SLE: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| |
| r[0].q = si_fcgt(r[0].q, r[1].q); |
| r[0].q = si_xori(r[0].q, 0xff); |
| |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SNE: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| |
| r[0].q = si_fceq(r[0].q, r[1].q); |
| r[0].q = si_xori(r[0].q, 0xff); |
| |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_STR: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_TEX: |
| /* simple texture lookup */ |
| /* src[0] = texcoord */ |
| /* src[1] = sampler unit */ |
| exec_tex(mach, inst, FALSE, FALSE); |
| break; |
| |
| case TGSI_OPCODE_TXB: |
| /* Texture lookup with lod bias */ |
| /* src[0] = texcoord (src[0].w = load bias) */ |
| /* src[1] = sampler unit */ |
| exec_tex(mach, inst, TRUE, FALSE); |
| break; |
| |
| case TGSI_OPCODE_TXD: |
| /* Texture lookup with explict partial derivatives */ |
| /* src[0] = texcoord */ |
| /* src[1] = d[strq]/dx */ |
| /* src[2] = d[strq]/dy */ |
| /* src[3] = sampler unit */ |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_TXL: |
| /* Texture lookup with explit LOD */ |
| /* src[0] = texcoord (src[0].w = load bias) */ |
| /* src[1] = sampler unit */ |
| exec_tex(mach, inst, TRUE, FALSE); |
| break; |
| |
| case TGSI_OPCODE_TXP: |
| /* Texture lookup with projection */ |
| /* src[0] = texcoord (src[0].w = projection) */ |
| /* src[1] = sampler unit */ |
| exec_tex(mach, inst, TRUE, TRUE); |
| break; |
| |
| case TGSI_OPCODE_UP2H: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_UP2US: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_UP4B: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_UP4UB: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_X2D: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_ARA: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_ARR: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_BRA: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_CAL: |
| /* skip the call if no execution channels are enabled */ |
| if (mach->ExecMask) { |
| /* do the call */ |
| |
| /* push the Cond, Loop, Cont stacks */ |
| ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
| mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
| ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
| mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; |
| ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
| mach->ContStack[mach->ContStackTop++] = mach->ContMask; |
| |
| ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); |
| mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; |
| |
| /* note that PC was already incremented above */ |
| mach->CallStack[mach->CallStackTop++] = *pc; |
| *pc = inst->InstructionExtLabel.Label; |
| } |
| break; |
| |
| case TGSI_OPCODE_RET: |
| mach->FuncMask &= ~mach->ExecMask; |
| UPDATE_EXEC_MASK(mach); |
| |
| if (mach->ExecMask == 0x0) { |
| /* really return now (otherwise, keep executing */ |
| |
| if (mach->CallStackTop == 0) { |
| /* returning from main() */ |
| *pc = -1; |
| return; |
| } |
| *pc = mach->CallStack[--mach->CallStackTop]; |
| |
| /* pop the Cond, Loop, Cont stacks */ |
| ASSERT(mach->CondStackTop > 0); |
| mach->CondMask = mach->CondStack[--mach->CondStackTop]; |
| ASSERT(mach->LoopStackTop > 0); |
| mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; |
| ASSERT(mach->ContStackTop > 0); |
| mach->ContMask = mach->ContStack[--mach->ContStackTop]; |
| ASSERT(mach->FuncStackTop > 0); |
| mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; |
| |
| UPDATE_EXEC_MASK(mach); |
| } |
| break; |
| |
| case TGSI_OPCODE_SSG: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_CMP: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH(&r[0], 0, chan_index); |
| FETCH(&r[1], 1, chan_index); |
| FETCH(&r[2], 2, chan_index); |
| |
| /* r0 = (r0 < 0.0) ? r1 : r2 |
| */ |
| r[3].q = si_xor(r[3].q, r[3].q); |
| r[0].q = micro_lt(r[0].q, r[3].q); |
| r[0].q = si_selb(r[1].q, r[2].q, r[0].q); |
| |
| STORE(&r[0], 0, chan_index); |
| } |
| break; |
| |
| case TGSI_OPCODE_SCS: |
| if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { |
| FETCH( &r[0], 0, CHAN_X ); |
| } |
| if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { |
| r[1].q = micro_cos(r[0].q); |
| STORE( &r[1], 0, CHAN_X ); |
| } |
| if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { |
| r[1].q = micro_sin(r[0].q); |
| STORE( &r[1], 0, CHAN_Y ); |
| } |
| if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { |
| STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); |
| } |
| if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { |
| STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); |
| } |
| break; |
| |
| case TGSI_OPCODE_NRM: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_DIV: |
| ASSERT( 0 ); |
| break; |
| |
| case TGSI_OPCODE_DP2: |
| FETCH( &r[0], 0, CHAN_X ); |
| FETCH( &r[1], 1, CHAN_X ); |
| r[0].q = si_fm(r[0].q, r[1].q); |
| |
| FETCH( &r[1], 0, CHAN_Y ); |
| FETCH( &r[2], 1, CHAN_Y ); |
| r[0].q = si_fma(r[1].q, r[2].q, r[0].q); |
| |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_IF: |
| /* push CondMask */ |
| ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
| mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
| FETCH( &r[0], 0, CHAN_X ); |
| /* update CondMask */ |
| if( ! r[0].u[0] ) { |
| mach->CondMask &= ~0x1; |
| } |
| if( ! r[0].u[1] ) { |
| mach->CondMask &= ~0x2; |
| } |
| if( ! r[0].u[2] ) { |
| mach->CondMask &= ~0x4; |
| } |
| if( ! r[0].u[3] ) { |
| mach->CondMask &= ~0x8; |
| } |
| UPDATE_EXEC_MASK(mach); |
| /* Todo: If CondMask==0, jump to ELSE */ |
| break; |
| |
| case TGSI_OPCODE_ELSE: |
| /* invert CondMask wrt previous mask */ |
| { |
| uint prevMask; |
| ASSERT(mach->CondStackTop > 0); |
| prevMask = mach->CondStack[mach->CondStackTop - 1]; |
| mach->CondMask = ~mach->CondMask & prevMask; |
| UPDATE_EXEC_MASK(mach); |
| /* Todo: If CondMask==0, jump to ENDIF */ |
| } |
| break; |
| |
| case TGSI_OPCODE_ENDIF: |
| /* pop CondMask */ |
| ASSERT(mach->CondStackTop > 0); |
| mach->CondMask = mach->CondStack[--mach->CondStackTop]; |
| UPDATE_EXEC_MASK(mach); |
| break; |
| |
| case TGSI_OPCODE_END: |
| /* halt execution */ |
| *pc = -1; |
| break; |
| |
| case TGSI_OPCODE_REP: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_ENDREP: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_PUSHA: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_POPA: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_CEIL: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = micro_ceil(r[0].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_I2F: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = si_csflt(r[0].q, 0); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_NOT: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = si_xorbi(r[0].q, 0xff); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_TRUNC: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| r[0].q = micro_trunc(r[0].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SHL: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| |
| r[0].q = si_shl(r[0].q, r[1].q); |
| |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_ISHR: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| r[0].q = micro_ishr(r[0].q, r[1].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_AND: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| r[0].q = si_and(r[0].q, r[1].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_OR: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| r[0].q = si_or(r[0].q, r[1].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_MOD: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_XOR: |
| FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |
| FETCH( &r[0], 0, chan_index ); |
| FETCH( &r[1], 1, chan_index ); |
| r[0].q = si_xor(r[0].q, r[1].q); |
| STORE( &r[0], 0, chan_index ); |
| } |
| break; |
| |
| case TGSI_OPCODE_SAD: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_TXF: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_TXQ: |
| ASSERT (0); |
| break; |
| |
| case TGSI_OPCODE_EMIT: |
| mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; |
| mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; |
| break; |
| |
| case TGSI_OPCODE_ENDPRIM: |
| mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; |
| mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; |
| break; |
| |
| case TGSI_OPCODE_BGNFOR: |
| /* fall-through (for now) */ |
| case TGSI_OPCODE_BGNLOOP: |
| /* push LoopMask and ContMasks */ |
| ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
| mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; |
| ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
| mach->ContStack[mach->ContStackTop++] = mach->ContMask; |
| break; |
| |
| case TGSI_OPCODE_ENDFOR: |
| /* fall-through (for now at least) */ |
| case TGSI_OPCODE_ENDLOOP: |
| /* Restore ContMask, but don't pop */ |
| ASSERT(mach->ContStackTop > 0); |
| mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; |
| if (mach->LoopMask) { |
| /* repeat loop: jump to instruction just past BGNLOOP */ |
| *pc = inst->InstructionExtLabel.Label + 1; |
| } |
| else { |
| /* exit loop: pop LoopMask */ |
| ASSERT(mach->LoopStackTop > 0); |
| mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; |
| /* pop ContMask */ |
| ASSERT(mach->ContStackTop > 0); |
| mach->ContMask = mach->ContStack[--mach->ContStackTop]; |
| } |
| UPDATE_EXEC_MASK(mach); |
| break; |
| |
| case TGSI_OPCODE_BRK: |
| /* turn off loop channels for each enabled exec channel */ |
| mach->LoopMask &= ~mach->ExecMask; |
| /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
| UPDATE_EXEC_MASK(mach); |
| break; |
| |
| case TGSI_OPCODE_CONT: |
| /* turn off cont channels for each enabled exec channel */ |
| mach->ContMask &= ~mach->ExecMask; |
| /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
| UPDATE_EXEC_MASK(mach); |
| break; |
| |
| case TGSI_OPCODE_BGNSUB: |
| /* no-op */ |
| break; |
| |
| case TGSI_OPCODE_ENDSUB: |
| /* no-op */ |
| break; |
| |
| case TGSI_OPCODE_NOP: |
| break; |
| |
| default: |
| ASSERT( 0 ); |
| } |
| } |
| |
| |
| /** |
| * Run TGSI interpreter. |
| * \return bitmask of "alive" quad components |
| */ |
| uint |
| spu_exec_machine_run( struct spu_exec_machine *mach ) |
| { |
| uint i; |
| int pc = 0; |
| |
| mach->CondMask = 0xf; |
| mach->LoopMask = 0xf; |
| mach->ContMask = 0xf; |
| mach->FuncMask = 0xf; |
| mach->ExecMask = 0xf; |
| |
| mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ |
| ASSERT(mach->CondStackTop == 0); |
| ASSERT(mach->LoopStackTop == 0); |
| ASSERT(mach->ContStackTop == 0); |
| ASSERT(mach->CallStackTop == 0); |
| |
| mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; |
| mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; |
| |
| if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { |
| mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; |
| mach->Primitives[0] = 0; |
| } |
| |
| |
| /* execute declarations (interpolants) */ |
| if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { |
| for (i = 0; i < mach->NumDeclarations; i++) { |
| union { |
| struct tgsi_full_declaration decl; |
| qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; |
| } d ALIGN16_ATTRIB; |
| unsigned ea = (unsigned) (mach->Declarations + pc); |
| |
| spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); |
| |
| exec_declaration( mach, &d.decl ); |
| } |
| } |
| |
| /* execute instructions, until pc is set to -1 */ |
| while (pc != -1) { |
| union { |
| struct tgsi_full_instruction inst; |
| qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; |
| } i ALIGN16_ATTRIB; |
| unsigned ea = (unsigned) (mach->Instructions + pc); |
| |
| spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); |
| exec_instruction( mach, & i.inst, &pc ); |
| } |
| |
| #if 0 |
| /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ |
| if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { |
| /* |
| * Scale back depth component. |
| */ |
| for (i = 0; i < 4; i++) |
| mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; |
| } |
| #endif |
| |
| return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; |
| } |
| |
| |