| /* |
| * Copyright © 2014 Broadcom |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include <stdbool.h> |
| #include "util/ralloc.h" |
| #include "vc4_qir.h" |
| #include "vc4_qpu.h" |
| |
| #define QPU_MUX(mux, muxfield) \ |
| QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield) |
| |
| static uint64_t |
| set_src_raddr(uint64_t inst, struct qpu_reg src) |
| { |
| if (src.mux == QPU_MUX_A) { |
| assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP || |
| QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr); |
| return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A); |
| } |
| |
| if (src.mux == QPU_MUX_B) { |
| assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP || |
| QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) && |
| QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM); |
| return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B); |
| } |
| |
| if (src.mux == QPU_MUX_SMALL_IMM) { |
| if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) { |
| assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr); |
| } else { |
| inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM); |
| assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP); |
| } |
| return ((inst & ~QPU_RADDR_B_MASK) | |
| QPU_SET_FIELD(src.addr, QPU_RADDR_B)); |
| } |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_NOP() |
| { |
| uint64_t inst = 0; |
| |
| inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD); |
| inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL); |
| |
| /* Note: These field values are actually non-zero */ |
| inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); |
| inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); |
| inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); |
| |
| return inst; |
| } |
| |
| static uint64_t |
| qpu_a_dst(struct qpu_reg dst) |
| { |
| uint64_t inst = 0; |
| |
| if (dst.mux <= QPU_MUX_R5) { |
| /* Translate the mux to the ACCn values. */ |
| inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD); |
| } else { |
| inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD); |
| if (dst.mux == QPU_MUX_B) |
| inst |= QPU_WS; |
| } |
| |
| return inst; |
| } |
| |
| static uint64_t |
| qpu_m_dst(struct qpu_reg dst) |
| { |
| uint64_t inst = 0; |
| |
| if (dst.mux <= QPU_MUX_R5) { |
| /* Translate the mux to the ACCn values. */ |
| inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL); |
| } else { |
| inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL); |
| if (dst.mux == QPU_MUX_A) |
| inst |= QPU_WS; |
| } |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) |
| { |
| uint64_t inst = 0; |
| |
| inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); |
| inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); |
| inst |= qpu_a_dst(dst); |
| inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); |
| inst |= QPU_MUX(src.mux, QPU_ADD_A); |
| inst |= QPU_MUX(src.mux, QPU_ADD_B); |
| inst = set_src_raddr(inst, src); |
| inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) |
| { |
| uint64_t inst = 0; |
| |
| inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); |
| inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); |
| inst |= qpu_m_dst(dst); |
| inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); |
| inst |= QPU_MUX(src.mux, QPU_MUL_A); |
| inst |= QPU_MUX(src.mux, QPU_MUL_B); |
| inst = set_src_raddr(inst, src); |
| inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) |
| { |
| uint64_t inst = 0; |
| |
| inst |= qpu_a_dst(dst); |
| inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); |
| inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); |
| inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); |
| inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG); |
| inst |= val; |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_load_imm_u2(struct qpu_reg dst, uint32_t val) |
| { |
| return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2, |
| QPU_LOAD_IMM_MODE); |
| } |
| |
| uint64_t |
| qpu_load_imm_i2(struct qpu_reg dst, uint32_t val) |
| { |
| return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2, |
| QPU_LOAD_IMM_MODE); |
| } |
| |
| uint64_t |
| qpu_branch(uint32_t cond, uint32_t target) |
| { |
| uint64_t inst = 0; |
| |
| inst |= qpu_a_dst(qpu_ra(QPU_W_NOP)); |
| inst |= qpu_m_dst(qpu_rb(QPU_W_NOP)); |
| inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND); |
| inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG); |
| inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET); |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_a_alu2(enum qpu_op_add op, |
| struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) |
| { |
| uint64_t inst = 0; |
| |
| inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); |
| inst |= QPU_SET_FIELD(op, QPU_OP_ADD); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); |
| inst |= qpu_a_dst(dst); |
| inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); |
| inst |= QPU_MUX(src0.mux, QPU_ADD_A); |
| inst = set_src_raddr(inst, src0); |
| inst |= QPU_MUX(src1.mux, QPU_ADD_B); |
| inst = set_src_raddr(inst, src1); |
| inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_m_alu2(enum qpu_op_mul op, |
| struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) |
| { |
| uint64_t inst = 0; |
| |
| inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); |
| inst |= QPU_SET_FIELD(op, QPU_OP_MUL); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); |
| inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); |
| inst |= qpu_m_dst(dst); |
| inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); |
| inst |= QPU_MUX(src0.mux, QPU_MUL_A); |
| inst = set_src_raddr(inst, src0); |
| inst |= QPU_MUX(src1.mux, QPU_MUL_B); |
| inst = set_src_raddr(inst, src1); |
| inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); |
| |
| return inst; |
| } |
| |
| uint64_t |
| qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot) |
| { |
| uint64_t inst = 0; |
| inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0); |
| |
| inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG); |
| inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot, |
| QPU_SMALL_IMM); |
| |
| return inst; |
| } |
| |
| static bool |
| merge_fields(uint64_t *merge, |
| uint64_t a, uint64_t b, |
| uint64_t mask, uint64_t ignore) |
| { |
| if ((a & mask) == ignore) { |
| *merge = (*merge & ~mask) | (b & mask); |
| } else if ((b & mask) == ignore) { |
| *merge = (*merge & ~mask) | (a & mask); |
| } else { |
| if ((a & mask) != (b & mask)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| int |
| qpu_num_sf_accesses(uint64_t inst) |
| { |
| int accesses = 0; |
| static const uint32_t specials[] = { |
| QPU_W_TLB_COLOR_MS, |
| QPU_W_TLB_COLOR_ALL, |
| QPU_W_TLB_Z, |
| QPU_W_TMU0_S, |
| QPU_W_TMU0_T, |
| QPU_W_TMU0_R, |
| QPU_W_TMU0_B, |
| QPU_W_TMU1_S, |
| QPU_W_TMU1_T, |
| QPU_W_TMU1_R, |
| QPU_W_TMU1_B, |
| QPU_W_SFU_RECIP, |
| QPU_W_SFU_RECIPSQRT, |
| QPU_W_SFU_EXP, |
| QPU_W_SFU_LOG, |
| }; |
| uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); |
| uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); |
| uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); |
| uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); |
| |
| for (int j = 0; j < ARRAY_SIZE(specials); j++) { |
| if (waddr_add == specials[j]) |
| accesses++; |
| if (waddr_mul == specials[j]) |
| accesses++; |
| } |
| |
| if (raddr_a == QPU_R_MUTEX_ACQUIRE) |
| accesses++; |
| if (raddr_b == QPU_R_MUTEX_ACQUIRE && |
| QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM) |
| accesses++; |
| |
| /* XXX: semaphore, combined color read/write? */ |
| switch (QPU_GET_FIELD(inst, QPU_SIG)) { |
| case QPU_SIG_COLOR_LOAD: |
| case QPU_SIG_COLOR_LOAD_END: |
| case QPU_SIG_LOAD_TMU0: |
| case QPU_SIG_LOAD_TMU1: |
| accesses++; |
| } |
| |
| return accesses; |
| } |
| |
| static bool |
| qpu_waddr_ignores_ws(uint32_t waddr) |
| { |
| switch(waddr) { |
| case QPU_W_ACC0: |
| case QPU_W_ACC1: |
| case QPU_W_ACC2: |
| case QPU_W_ACC3: |
| case QPU_W_NOP: |
| case QPU_W_TLB_Z: |
| case QPU_W_TLB_COLOR_MS: |
| case QPU_W_TLB_COLOR_ALL: |
| case QPU_W_TLB_ALPHA_MASK: |
| case QPU_W_VPM: |
| case QPU_W_SFU_RECIP: |
| case QPU_W_SFU_RECIPSQRT: |
| case QPU_W_SFU_EXP: |
| case QPU_W_SFU_LOG: |
| case QPU_W_TMU0_S: |
| case QPU_W_TMU0_T: |
| case QPU_W_TMU0_R: |
| case QPU_W_TMU0_B: |
| case QPU_W_TMU1_S: |
| case QPU_W_TMU1_T: |
| case QPU_W_TMU1_R: |
| case QPU_W_TMU1_B: |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static void |
| swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift) |
| { |
| uint64_t mux_mask = (uint64_t)0x7 << mux_shift; |
| uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift; |
| uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift; |
| |
| if ((*a & mux_mask) == mux_a_val) { |
| *a = (*a & ~mux_mask) | mux_b_val; |
| *merge = (*merge & ~mux_mask) | mux_b_val; |
| } |
| } |
| |
| static bool |
| try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b) |
| { |
| uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A); |
| uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B); |
| uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A); |
| uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B); |
| |
| if (raddr_a_b != QPU_R_NOP) |
| return false; |
| |
| switch (raddr_a_a) { |
| case QPU_R_UNIF: |
| case QPU_R_VARY: |
| break; |
| default: |
| return false; |
| } |
| |
| if (!(*merge & QPU_PM) && |
| QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) { |
| return false; |
| } |
| |
| if (raddr_b_b != QPU_R_NOP && |
| raddr_b_b != raddr_a_a) |
| return false; |
| |
| /* Move raddr A to B in instruction a. */ |
| *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); |
| *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B); |
| *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A); |
| *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B); |
| swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT); |
| swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT); |
| swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT); |
| swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT); |
| |
| return true; |
| } |
| |
| static bool |
| convert_mov(uint64_t *inst) |
| { |
| uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A); |
| uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD); |
| uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD); |
| |
| /* Is it a MOV? */ |
| if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR || |
| (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) { |
| return false; |
| } |
| |
| if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE) |
| return false; |
| |
| /* We could maybe support this in the .8888 and .8a-.8d cases. */ |
| if (*inst & QPU_PM) |
| return false; |
| |
| *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD); |
| *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL); |
| |
| *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A); |
| *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B); |
| *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A); |
| *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B); |
| |
| *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL); |
| *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD); |
| |
| *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL); |
| *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD); |
| |
| if (!qpu_waddr_ignores_ws(waddr_add)) |
| *inst ^= QPU_WS; |
| |
| return true; |
| } |
| |
| static bool |
| writes_a_file(uint64_t inst) |
| { |
| if (!(inst & QPU_WS)) |
| return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32; |
| else |
| return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32; |
| } |
| |
| static bool |
| reads_r4(uint64_t inst) |
| { |
| return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 || |
| QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 || |
| QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 || |
| QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4); |
| } |
| |
| uint64_t |
| qpu_merge_inst(uint64_t a, uint64_t b) |
| { |
| uint64_t merge = a | b; |
| bool ok = true; |
| uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG); |
| uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG); |
| |
| if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP && |
| QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) { |
| if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP || |
| QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP || |
| !(convert_mov(&a) || convert_mov(&b))) { |
| return 0; |
| } else { |
| merge = a | b; |
| } |
| } |
| |
| if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP && |
| QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) |
| return 0; |
| |
| if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b)) |
| return 0; |
| |
| if (a_sig == QPU_SIG_LOAD_IMM || |
| b_sig == QPU_SIG_LOAD_IMM || |
| a_sig == QPU_SIG_SMALL_IMM || |
| b_sig == QPU_SIG_SMALL_IMM || |
| a_sig == QPU_SIG_BRANCH || |
| b_sig == QPU_SIG_BRANCH) { |
| return 0; |
| } |
| |
| ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK, |
| QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); |
| |
| /* Misc fields that have to match exactly. */ |
| ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0); |
| |
| if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK, |
| QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) { |
| /* Since we tend to use regfile A by default both for register |
| * allocation and for our special values (uniforms and |
| * varyings), try swapping uniforms and varyings to regfile B |
| * to resolve raddr A conflicts. |
| */ |
| if (!try_swap_ra_file(&merge, &a, &b) && |
| !try_swap_ra_file(&merge, &b, &a)) { |
| return 0; |
| } |
| } |
| |
| ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK, |
| QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B)); |
| |
| ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK, |
| QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD)); |
| ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK, |
| QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL)); |
| |
| /* Allow disagreement on WS (swapping A vs B physical reg file as the |
| * destination for ADD/MUL) if one of the original instructions |
| * ignores it (probably because it's just writing to accumulators). |
| */ |
| if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) && |
| qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) { |
| merge = (merge & ~QPU_WS) | (b & QPU_WS); |
| } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) && |
| qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) { |
| merge = (merge & ~QPU_WS) | (a & QPU_WS); |
| } else { |
| if ((a & QPU_WS) != (b & QPU_WS)) |
| return 0; |
| } |
| |
| if (!merge_fields(&merge, a, b, QPU_PM, ~0)) { |
| /* If one instruction has PM bit set and the other not, the |
| * one without PM shouldn't do packing/unpacking, and we |
| * have to make sure non-NOP packing/unpacking from PM |
| * instruction aren't added to it. |
| */ |
| uint64_t temp; |
| |
| /* Let a be the one with PM bit */ |
| if (!(a & QPU_PM)) { |
| temp = a; |
| a = b; |
| b = temp; |
| } |
| |
| if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0) |
| return 0; |
| |
| if ((a & QPU_PACK_MASK) != 0 && |
| QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) |
| return 0; |
| |
| if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b)) |
| return 0; |
| } else { |
| /* packing: Make sure that non-NOP packs agree, then deal with |
| * special-case failing of adding a non-NOP pack to something |
| * with a NOP pack. |
| */ |
| if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0)) |
| return 0; |
| bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) != |
| QPU_GET_FIELD(merge, QPU_PACK)); |
| bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) != |
| QPU_GET_FIELD(merge, QPU_PACK)); |
| if (!(merge & QPU_PM)) { |
| /* Make sure we're not going to be putting a new |
| * a-file packing on either half. |
| */ |
| if (new_a_pack && writes_a_file(a)) |
| return 0; |
| |
| if (new_b_pack && writes_a_file(b)) |
| return 0; |
| } else { |
| /* Make sure we're not going to be putting new MUL |
| * packing oneither half. |
| */ |
| if (new_a_pack && |
| QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP) |
| return 0; |
| |
| if (new_b_pack && |
| QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) |
| return 0; |
| } |
| |
| /* unpacking: Make sure that non-NOP unpacks agree, then deal |
| * with special-case failing of adding a non-NOP unpack to |
| * something with a NOP unpack. |
| */ |
| if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0)) |
| return 0; |
| bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) != |
| QPU_GET_FIELD(merge, QPU_UNPACK)); |
| bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) != |
| QPU_GET_FIELD(merge, QPU_UNPACK)); |
| if (!(merge & QPU_PM)) { |
| /* Make sure we're not going to be putting a new |
| * a-file packing on either half. |
| */ |
| if (new_a_unpack && |
| QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP) |
| return 0; |
| |
| if (new_b_unpack && |
| QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP) |
| return 0; |
| } else { |
| /* Make sure we're not going to be putting new r4 |
| * unpack on either half. |
| */ |
| if (new_a_unpack && reads_r4(a)) |
| return 0; |
| |
| if (new_b_unpack && reads_r4(b)) |
| return 0; |
| } |
| } |
| |
| if (ok) |
| return merge; |
| else |
| return 0; |
| } |
| |
| uint64_t |
| qpu_set_sig(uint64_t inst, uint32_t sig) |
| { |
| assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE); |
| return QPU_UPDATE_FIELD(inst, sig, QPU_SIG); |
| } |
| |
| uint64_t |
| qpu_set_cond_add(uint64_t inst, uint32_t cond) |
| { |
| assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS); |
| return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD); |
| } |
| |
| uint64_t |
| qpu_set_cond_mul(uint64_t inst, uint32_t cond) |
| { |
| assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS); |
| return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL); |
| } |
| |
| bool |
| qpu_waddr_is_tlb(uint32_t waddr) |
| { |
| switch (waddr) { |
| case QPU_W_TLB_COLOR_ALL: |
| case QPU_W_TLB_COLOR_MS: |
| case QPU_W_TLB_Z: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| qpu_inst_is_tlb(uint64_t inst) |
| { |
| uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); |
| |
| return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) || |
| qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) || |
| sig == QPU_SIG_COLOR_LOAD || |
| sig == QPU_SIG_WAIT_FOR_SCOREBOARD); |
| } |
| |
| /** |
| * Returns the small immediate value to be encoded in to the raddr b field if |
| * the argument can be represented as one, or ~0 otherwise. |
| */ |
| uint32_t |
| qpu_encode_small_immediate(uint32_t i) |
| { |
| if (i <= 15) |
| return i; |
| if ((int)i < 0 && (int)i >= -16) |
| return i + 32; |
| |
| switch (i) { |
| case 0x3f800000: |
| return 32; |
| case 0x40000000: |
| return 33; |
| case 0x40800000: |
| return 34; |
| case 0x41000000: |
| return 35; |
| case 0x41800000: |
| return 36; |
| case 0x42000000: |
| return 37; |
| case 0x42800000: |
| return 38; |
| case 0x43000000: |
| return 39; |
| case 0x3b800000: |
| return 40; |
| case 0x3c000000: |
| return 41; |
| case 0x3c800000: |
| return 42; |
| case 0x3d000000: |
| return 43; |
| case 0x3d800000: |
| return 44; |
| case 0x3e000000: |
| return 45; |
| case 0x3e800000: |
| return 46; |
| case 0x3f000000: |
| return 47; |
| } |
| |
| return ~0; |
| } |
| |
| void |
| qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst) |
| { |
| if (c->qpu_inst_count >= c->qpu_inst_size) { |
| c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2); |
| c->qpu_insts = reralloc(c, c->qpu_insts, |
| uint64_t, c->qpu_inst_size); |
| } |
| c->qpu_insts[c->qpu_inst_count++] = inst; |
| } |