blob: e9e387632b92ab736989ed80f5f372df83e34e3e [file] [log] [blame]
/*
* Copyright 2010 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "nvc0_pc.h"
#include "nvc0_program.h"
#define DESCEND_ARBITRARY(j, f) \
do { \
b->pass_seq = ctx->pc->pass_seq; \
\
for (j = 0; j < 2; ++j) \
if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
f(ctx, b->out[j]); \
} while (0)
static INLINE boolean
registers_interfere(struct nv_value *a, struct nv_value *b)
{
if (a->reg.file != b->reg.file)
return FALSE;
if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
return FALSE;
assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
if (a->join->reg.id < b->join->reg.id) {
return (a->join->reg.id + a->reg.size >= b->join->reg.id);
} else
if (a->join->reg.id > b->join->reg.id) {
return (b->join->reg.id + b->reg.size >= a->join->reg.id);
}
return FALSE;
}
static INLINE boolean
values_equal(struct nv_value *a, struct nv_value *b)
{
if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
return FALSE;
if (NV_IS_MEMORY_FILE(a->reg.file))
return a->reg.address == b->reg.address;
else
return a->join->reg.id == b->join->reg.id;
}
#if 0
static INLINE boolean
inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
{
int si, di;
for (di = 0; di < 4 && a->def[di]; ++di)
for (si = 0; si < 5 && b->src[si]; ++si)
if (registers_interfere(a->def[di], b->src[si]->value))
return FALSE;
return TRUE;
}
/* Check whether we can swap the order of the instructions,
* where a & b may be either the earlier or the later one.
*/
static boolean
inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
{
return inst_commutation_check(a, b) && inst_commutation_check(b, a);
}
#endif
static INLINE boolean
inst_removable(struct nv_instruction *nvi)
{
if (nvi->opcode == NV_OP_ST)
return FALSE;
return (!(nvi->terminator ||
nvi->join ||
nvi->target ||
nvi->fixed ||
nvc0_insn_refcount(nvi)));
}
static INLINE boolean
inst_is_noop(struct nv_instruction *nvi)
{
if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
return TRUE;
if (nvi->terminator || nvi->join)
return FALSE;
if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
return TRUE;
if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
return FALSE;
if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
return FALSE;
if (nvi->src[0]->value->join->reg.id < 0) {
NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
return TRUE;
}
if (nvi->opcode == NV_OP_SELECT)
if (!values_equal(nvi->def[0], nvi->src[1]->value))
return FALSE;
return values_equal(nvi->def[0], nvi->src[0]->value);
}
struct nv_pass {
struct nv_pc *pc;
int n;
void *priv;
};
static int
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
static void
nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
{
struct nv_pc *pc = (struct nv_pc *)priv;
struct nv_basic_block *in;
struct nv_instruction *nvi, *next;
int j;
for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
if (j >= 0) {
in = pc->bb_list[j];
/* check for no-op branches (BRA $PC+8) */
if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
in->emit_size -= 8;
pc->emit_size -= 8;
for (++j; j < pc->num_blocks; ++j)
pc->bb_list[j]->emit_pos -= 8;
nvc0_insn_delete(in->exit);
}
b->emit_pos = in->emit_pos + in->emit_size;
}
pc->bb_list[pc->num_blocks++] = b;
/* visit node */
for (nvi = b->entry; nvi; nvi = next) {
next = nvi->next;
if (inst_is_noop(nvi) ||
(pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
nvc0_insn_delete(nvi);
} else
b->emit_size += 8;
}
pc->emit_size += b->emit_size;
#ifdef NOUVEAU_DEBUG
if (!b->entry)
debug_printf("BB:%i is now empty\n", b->id);
else
debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
#endif
}
static int
nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass pass;
pass.pc = pc;
pc->pass_seq++;
nv_pass_flatten(&pass, root);
nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
return 0;
}
int
nvc0_pc_exec_pass2(struct nv_pc *pc)
{
int i, ret;
NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
pc->num_blocks = 0; /* will reorder bb_list */
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
return ret;
return 0;
}
static INLINE boolean
is_cspace_load(struct nv_instruction *nvi)
{
if (!nvi)
return FALSE;
assert(nvi->indirect != 0);
return (nvi->opcode == NV_OP_LD &&
nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
}
static INLINE boolean
is_immd32_load(struct nv_instruction *nvi)
{
if (!nvi)
return FALSE;
return (nvi->opcode == NV_OP_MOV &&
nvi->src[0]->value->reg.file == NV_FILE_IMM &&
nvi->src[0]->value->reg.size == 4);
}
static INLINE void
check_swap_src_0_1(struct nv_instruction *nvi)
{
static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
struct nv_ref *src0 = nvi->src[0];
struct nv_ref *src1 = nvi->src[1];
if (!nv_op_commutative(nvi->opcode))
return;
assert(src0 && src1 && src0->value && src1->value);
if (is_cspace_load(src0->value->insn)) {
if (!is_cspace_load(src1->value->insn)) {
nvi->src[0] = src1;
nvi->src[1] = src0;
}
}
if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
nvi->set_cond = cc_swapped[nvi->set_cond];
}
static void
nvi_set_indirect_load(struct nv_pc *pc,
struct nv_instruction *nvi, struct nv_value *val)
{
for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
++nvi->indirect);
assert(nvi->indirect < 6);
nv_reference(pc, nvi, nvi->indirect, val);
}
static int
nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *nvi, *ld;
int s;
for (nvi = b->entry; nvi; nvi = nvi->next) {
check_swap_src_0_1(nvi);
for (s = 0; s < 3 && nvi->src[s]; ++s) {
ld = nvi->src[s]->value->insn;
if (!ld || ld->opcode != NV_OP_LD)
continue;
if (!nvc0_insn_can_load(nvi, s, ld))
continue;
/* fold it ! */
nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
if (ld->indirect >= 0)
nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
if (!nvc0_insn_refcount(ld))
nvc0_insn_delete(ld);
}
}
DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
return 0;
}
static INLINE uint
modifiers_opcode(uint8_t mod)
{
switch (mod) {
case NV_MOD_NEG: return NV_OP_NEG;
case NV_MOD_ABS: return NV_OP_ABS;
case 0:
return NV_OP_MOV;
default:
return NV_OP_NOP;
}
}
/* NOTE: Assumes loads have not yet been folded. */
static int
nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *nvi, *mi, *next;
int j;
uint8_t mod;
for (nvi = b->entry; nvi; nvi = next) {
next = nvi->next;
if (nvi->opcode == NV_OP_SUB) {
nvi->src[1]->mod ^= NV_MOD_NEG;
nvi->opcode = NV_OP_ADD;
}
for (j = 0; j < 3 && nvi->src[j]; ++j) {
mi = nvi->src[j]->value->insn;
if (!mi)
continue;
if (mi->def[0]->refc > 1 || mi->predicate >= 0)
continue;
if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
else
if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
else
continue;
assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
mod |= mi->src[0]->mod;
if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
/* abs neg [abs] = abs */
mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
} else
if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
/* neg as opcode and modifier on same insn cannot occur */
/* neg neg abs = abs, neg neg = identity */
assert(j == 0);
if (mod & NV_MOD_ABS)
nvi->opcode = NV_OP_ABS;
else
nvi->opcode = NV_OP_MOV;
mod = 0;
}
if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
continue;
nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
nvi->src[j]->mod ^= mod;
}
if (nvi->opcode == NV_OP_SAT) {
mi = nvi->src[0]->value->insn;
if (mi->def[0]->refc > 1 ||
(mi->opcode != NV_OP_ADD &&
mi->opcode != NV_OP_MUL &&
mi->opcode != NV_OP_MAD))
continue;
mi->saturate = 1;
mi->def[0] = nvi->def[0];
mi->def[0]->insn = mi;
nvc0_insn_delete(nvi);
}
}
DESCEND_ARBITRARY(j, nv_pass_lower_mods);
return 0;
}
#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
/*
static void
modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
{
if (mod & NV_MOD_ABS) {
if (type == NV_TYPE_F32)
*val &= 0x7fffffff;
else
if ((*val) & (1 << 31))
*val = ~(*val) + 1;
}
if (mod & NV_MOD_NEG) {
if (type == NV_TYPE_F32)
*val ^= 0x80000000;
else
*val = ~(*val) + 1;
}
}
*/
#if 0
static void
constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
struct nv_value *src0, struct nv_value *src1)
{
struct nv_value *val;
union {
float f32;
uint32_t u32;
int32_t s32;
} u0, u1, u;
ubyte type;
if (!nvi->def[0])
return;
type = NV_OPTYPE(nvi->opcode);
u.u32 = 0;
u0.u32 = src0->reg.imm.u32;
u1.u32 = src1->reg.imm.u32;
modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
switch (nvi->opcode) {
case NV_OP_MAD:
if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
return;
/* fall through */
case NV_OP_MUL:
switch (type) {
case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
default:
assert(0);
break;
}
break;
case NV_OP_ADD:
switch (type) {
case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
default:
assert(0);
break;
}
break;
case NV_OP_SUB:
switch (type) {
case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
default:
assert(0);
break;
}
break;
default:
return;
}
nvi->opcode = NV_OP_MOV;
val = new_value(pc, NV_FILE_IMM, type);
val->reg.imm.u32 = u.u32;
nv_reference(pc, nvi, 1, NULL);
nv_reference(pc, nvi, 0, val);
if (nvi->src[2]) { /* from MAD */
nvi->src[1] = nvi->src[0];
nvi->src[0] = nvi->src[2];
nvi->src[2] = NULL;
nvi->opcode = NV_OP_ADD;
if (val->reg.imm.u32 == 0) {
nvi->src[1] = NULL;
nvi->opcode = NV_OP_MOV;
}
}
}
static void
constant_operand(struct nv_pc *pc,
struct nv_instruction *nvi, struct nv_value *val, int s)
{
union {
float f32;
uint32_t u32;
int32_t s32;
} u;
int t = s ? 0 : 1;
uint op;
ubyte type;
if (!nvi->def[0])
return;
type = NV_OPTYPE(nvi->opcode);
u.u32 = val->reg.imm.u32;
modifiers_apply(&u.u32, type, nvi->src[s]->mod);
switch (NV_BASEOP(nvi->opcode)) {
case NV_OP_MUL:
if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
(NV_TYPE_ISINT(type) && u.u32 == 1)) {
if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
break;
nvi->opcode = op;
nv_reference(pc, nvi, s, NULL);
nvi->src[0] = nvi->src[t];
nvi->src[1] = NULL;
} else
if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
(NV_TYPE_ISINT(type) && u.u32 == 2)) {
nvi->opcode = NV_OP_ADD;
nv_reference(pc, nvi, s, nvi->src[t]->value);
nvi->src[s]->mod = nvi->src[t]->mod;
} else
if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
if (nvi->src[t]->mod & NV_MOD_NEG)
nvi->opcode = NV_OP_MOV;
else
nvi->opcode = NV_OP_NEG;
nv_reference(pc, nvi, s, NULL);
nvi->src[0] = nvi->src[t];
nvi->src[1] = NULL;
} else
if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
nvi->opcode = NV_OP_ADD;
nv_reference(pc, nvi, s, nvi->src[t]->value);
nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
} else
if (u.u32 == 0) {
nvi->opcode = NV_OP_MOV;
nv_reference(pc, nvi, t, NULL);
if (s) {
nvi->src[0] = nvi->src[1];
nvi->src[1] = NULL;
}
}
break;
case NV_OP_ADD:
if (u.u32 == 0) {
if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
break;
nvi->opcode = op;
nv_reference(pc, nvi, s, NULL);
nvi->src[0] = nvi->src[t];
nvi->src[1] = NULL;
}
break;
case NV_OP_RCP:
u.f32 = 1.0f / u.f32;
(val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
nvi->opcode = NV_OP_MOV;
assert(s == 0);
nv_reference(pc, nvi, 0, val);
break;
case NV_OP_RSQ:
u.f32 = 1.0f / sqrtf(u.f32);
(val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
nvi->opcode = NV_OP_MOV;
assert(s == 0);
nv_reference(pc, nvi, 0, val);
break;
default:
break;
}
}
#endif
static int
nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
{
#if 0
struct nv_instruction *nvi, *next;
int j;
for (nvi = b->entry; nvi; nvi = next) {
struct nv_value *src0, *src1, *src;
int mod;
next = nvi->next;
src0 = nvcg_find_immediate(nvi->src[0]);
src1 = nvcg_find_immediate(nvi->src[1]);
if (src0 && src1)
constant_expression(ctx->pc, nvi, src0, src1);
else {
if (src0)
constant_operand(ctx->pc, nvi, src0, 0);
else
if (src1)
constant_operand(ctx->pc, nvi, src1, 1);
}
/* try to combine MUL, ADD into MAD */
if (nvi->opcode != NV_OP_ADD)
continue;
src0 = nvi->src[0]->value;
src1 = nvi->src[1]->value;
if (SRC_IS_MUL(src0) && src0->refc == 1)
src = src0;
else
if (SRC_IS_MUL(src1) && src1->refc == 1)
src = src1;
else
continue;
/* could have an immediate from above constant_* */
if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
continue;
nvi->opcode = NV_OP_MAD;
mod = nvi->src[(src == src0) ? 0 : 1]->mod;
nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
assert(!(mod & ~NV_MOD_NEG));
nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
nvi->src[1]->mod = src->insn->src[1]->mod;
}
DESCEND_ARBITRARY(j, nv_pass_lower_arith);
#endif
return 0;
}
/* TODO: redundant store elimination */
struct mem_record {
struct mem_record *next;
struct nv_instruction *insn;
uint32_t ofst;
uint32_t base;
uint32_t size;
};
#define MEM_RECORD_POOL_SIZE 1024
struct pass_reld_elim {
struct nv_pc *pc;
struct mem_record *imm;
struct mem_record *mem_v;
struct mem_record *mem_a;
struct mem_record *mem_c[16];
struct mem_record *mem_l;
struct mem_record pool[MEM_RECORD_POOL_SIZE];
int alloc;
};
static void
combine_load(struct mem_record *rec, struct nv_instruction *ld)
{
struct nv_instruction *fv = rec->insn;
struct nv_value *mem = ld->src[0]->value;
uint32_t size = rec->size + mem->reg.size;
int j;
int d = rec->size / 4;
assert(rec->size < 16);
if (rec->ofst > mem->reg.address) {
if ((size == 8 && mem->reg.address & 3) ||
(size > 8 && mem->reg.address & 7))
return;
rec->ofst = mem->reg.address;
for (j = 0; j < d; ++j)
fv->def[d + j] = fv->def[j];
d = 0;
} else
if ((size == 8 && rec->ofst & 3) ||
(size > 8 && rec->ofst & 7)) {
return;
}
for (j = 0; j < mem->reg.size / 4; ++j) {
fv->def[d] = ld->def[j];
fv->def[d++]->insn = fv;
}
fv->src[0]->value->reg.size = rec->size = size;
nvc0_insn_delete(ld);
}
static void
combine_export(struct mem_record *rec, struct nv_instruction *ex)
{
}
static INLINE void
add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
{
struct mem_record *it = &ctx->pool[ctx->alloc++];
it->next = *rec;
*rec = it;
it->base = base;
it->ofst = ofst;
it->insn = nvi;
it->size = nvi->src[0]->value->reg.size;
}
/* vectorize and reuse loads from memory or of immediates */
static int
nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
{
struct mem_record **rec, *it;
struct nv_instruction *ld, *next;
struct nv_value *mem;
uint32_t base, ofst;
int s;
for (ld = b->entry; ld; ld = next) {
next = ld->next;
if (is_cspace_load(ld)) {
mem = ld->src[0]->value;
rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
} else
if (ld->opcode == NV_OP_VFETCH) {
mem = ld->src[0]->value;
rec = &ctx->mem_a;
} else
if (ld->opcode == NV_OP_EXPORT) {
mem = ld->src[0]->value;
if (mem->reg.file != NV_FILE_MEM_V)
continue;
rec = &ctx->mem_v;
} else {
continue;
}
if (ld->def[0] && ld->def[0]->refc == 0)
continue;
ofst = mem->reg.address;
base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
for (it = *rec; it; it = it->next) {
if (it->base == base &&
((it->ofst >> 4) == (ofst >> 4)) &&
((it->ofst + it->size == ofst) ||
(it->ofst - mem->reg.size == ofst))) {
if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
continue;
if (it->ofst < ofst) {
if ((it->ofst & 0xf) == 4)
continue;
} else
if ((ofst & 0xf) == 4)
continue;
break;
}
}
if (it) {
switch (ld->opcode) {
case NV_OP_EXPORT: combine_export(it, ld); break;
default:
combine_load(it, ld);
break;
}
} else
if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
add_mem_record(ctx, rec, base, ofst, ld);
}
}
DESCEND_ARBITRARY(s, nv_pass_mem_opt);
return 0;
}
static void
eliminate_store(struct mem_record *rec, struct nv_instruction *st)
{
}
/* elimination of redundant stores */
static int
pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
{
struct mem_record **rec, *it;
struct nv_instruction *st, *next;
struct nv_value *mem;
uint32_t base, ofst, size;
int s;
for (st = b->entry; st; st = next) {
next = st->next;
if (st->opcode == NV_OP_ST) {
mem = st->src[0]->value;
rec = &ctx->mem_l;
} else
if (st->opcode == NV_OP_EXPORT) {
mem = st->src[0]->value;
if (mem->reg.file != NV_FILE_MEM_V)
continue;
rec = &ctx->mem_v;
} else
if (st->opcode == NV_OP_ST) {
/* TODO: purge */
}
ofst = mem->reg.address;
base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
size = mem->reg.size;
for (it = *rec; it; it = it->next) {
if (it->base == base &&
(it->ofst <= ofst && (it->ofst + size) > ofst))
break;
}
if (it)
eliminate_store(it, st);
else
add_mem_record(ctx, rec, base, ofst, st);
}
DESCEND_ARBITRARY(s, nv_pass_mem_opt);
return 0;
}
/* TODO: properly handle loads from l[] memory in the presence of stores */
static int
nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
{
#if 0
struct load_record **rec, *it;
struct nv_instruction *ld, *next;
uint64_t data[2];
struct nv_value *val;
int j;
for (ld = b->entry; ld; ld = next) {
next = ld->next;
if (!ld->src[0])
continue;
val = ld->src[0]->value;
rec = NULL;
if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
data[0] = val->reg.id;
data[1] = 0;
rec = &ctx->mem_v;
} else
if (ld->opcode == NV_OP_LDA) {
data[0] = val->reg.id;
data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
if (val->reg.file >= NV_FILE_MEM_C(0) &&
val->reg.file <= NV_FILE_MEM_C(15))
rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
else
if (val->reg.file == NV_FILE_MEM_S)
rec = &ctx->mem_s;
else
if (val->reg.file == NV_FILE_MEM_L)
rec = &ctx->mem_l;
} else
if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
data[0] = val->reg.imm.u32;
data[1] = 0;
rec = &ctx->imm;
}
if (!rec || !ld->def[0]->refc)
continue;
for (it = *rec; it; it = it->next)
if (it->data[0] == data[0] && it->data[1] == data[1])
break;
if (it) {
if (ld->def[0]->reg.id >= 0)
it->value = ld->def[0];
else
if (!ld->fixed)
nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
} else {
if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
continue;
it = &ctx->pool[ctx->alloc++];
it->next = *rec;
it->data[0] = data[0];
it->data[1] = data[1];
it->value = ld->def[0];
*rec = it;
}
}
ctx->imm = NULL;
ctx->mem_s = NULL;
ctx->mem_v = NULL;
for (j = 0; j < 16; ++j)
ctx->mem_c[j] = NULL;
ctx->mem_l = NULL;
ctx->alloc = 0;
DESCEND_ARBITRARY(j, nv_pass_reload_elim);
#endif
return 0;
}
static int
nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
{
int i, c, j;
for (i = 0; i < ctx->pc->num_instructions; ++i) {
struct nv_instruction *nvi = &ctx->pc->instructions[i];
struct nv_value *def[4];
if (!nv_is_texture_op(nvi->opcode))
continue;
nvi->tex_mask = 0;
for (c = 0; c < 4; ++c) {
if (nvi->def[c]->refc)
nvi->tex_mask |= 1 << c;
def[c] = nvi->def[c];
}
j = 0;
for (c = 0; c < 4; ++c)
if (nvi->tex_mask & (1 << c))
nvi->def[j++] = def[c];
for (c = 0; c < 4; ++c)
if (!(nvi->tex_mask & (1 << c)))
nvi->def[j++] = def[c];
assert(j == 4);
}
return 0;
}
struct nv_pass_dce {
struct nv_pc *pc;
uint removed;
};
static int
nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
{
int j;
struct nv_instruction *nvi, *next;
for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
next = nvi->next;
if (inst_removable(nvi)) {
nvc0_insn_delete(nvi);
++ctx->removed;
}
}
DESCEND_ARBITRARY(j, nv_pass_dce);
return 0;
}
#if 0
/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
* Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
* BREAK and dummy ELSE block.
*/
static INLINE boolean
bb_is_if_else_endif(struct nv_basic_block *bb)
{
if (!bb->out[0] || !bb->out[1])
return FALSE;
if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
!bb->out[1]->out[1]);
} else {
return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
!bb->out[0]->out[1] &&
!bb->out[1]->out[1]);
}
}
/* predicate instructions and remove branch at the end */
static void
predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
struct nv_value *p, ubyte cc)
{
}
#endif
/* NOTE: Run this after register allocation, we can just cut out the cflow
* instructions and hook the predicates to the conditional OPs if they are
* not using immediates; better than inserting SELECT to join definitions.
*
* NOTE: Should adapt prior optimization to make this possible more often.
*/
static int
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
{
return 0;
}
/* local common subexpression elimination, stupid O(n^2) implementation */
static int
nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
{
#if 0
struct nv_instruction *ir, *ik, *next;
struct nv_instruction *entry = b->phi ? b->phi : b->entry;
int s;
unsigned int reps;
do {
reps = 0;
for (ir = entry; ir; ir = next) {
next = ir->next;
for (ik = entry; ik != ir; ik = ik->next) {
if (ir->opcode != ik->opcode || ir->fixed)
continue;
if (!ir->def[0] || !ik->def[0] ||
ik->opcode == NV_OP_LDA ||
ik->opcode == NV_OP_STA ||
ik->opcode == NV_OP_MOV ||
nv_is_vector_op(ik->opcode))
continue; /* ignore loads, stores & moves */
if (ik->src[4] || ir->src[4])
continue; /* don't mess with address registers */
if (ik->flags_src || ir->flags_src ||
ik->flags_def || ir->flags_def)
continue; /* and also not with flags, for now */
if (ik->def[0]->reg.file == NV_FILE_OUT ||
ir->def[0]->reg.file == NV_FILE_OUT ||
!values_equal(ik->def[0], ir->def[0]))
continue;
for (s = 0; s < 3; ++s) {
struct nv_value *a, *b;
if (!ik->src[s]) {
if (ir->src[s])
break;
continue;
}
if (ik->src[s]->mod != ir->src[s]->mod)
break;
a = ik->src[s]->value;
b = ir->src[s]->value;
if (a == b)
continue;
if (a->reg.file != b->reg.file ||
a->reg.id < 0 ||
a->reg.id != b->reg.id)
break;
}
if (s == 3) {
nvc0_insn_delete(ir);
++reps;
nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
break;
}
}
}
} while(reps);
DESCEND_ARBITRARY(s, nv_pass_cse);
#endif
return 0;
}
static int
nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
{
struct pass_reld_elim *reldelim;
struct nv_pass pass;
struct nv_pass_dce dce;
int ret;
pass.n = 0;
pass.pc = pc;
/* Do this first, so we don't have to pay attention
* to whether sources are supported memory loads.
*/
pc->pass_seq++;
ret = nv_pass_lower_arith(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
ret = nv_pass_lower_mods(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
ret = nvc0_pass_fold_loads(&pass, root);
if (ret)
return ret;
if (pc->opt_reload_elim) {
reldelim = CALLOC_STRUCT(pass_reld_elim);
reldelim->pc = pc;
pc->pass_seq++;
ret = nv_pass_reload_elim(reldelim, root);
if (ret) {
FREE(reldelim);
return ret;
}
memset(reldelim, 0, sizeof(struct pass_reld_elim));
reldelim->pc = pc;
}
pc->pass_seq++;
ret = nv_pass_cse(&pass, root);
if (ret)
return ret;
dce.pc = pc;
do {
dce.removed = 0;
pc->pass_seq++;
ret = nv_pass_dce(&dce, root);
if (ret)
return ret;
} while (dce.removed);
if (pc->opt_reload_elim) {
pc->pass_seq++;
ret = nv_pass_mem_opt(reldelim, root);
if (!ret) {
memset(reldelim, 0, sizeof(struct pass_reld_elim));
reldelim->pc = pc;
pc->pass_seq++;
ret = nv_pass_mem_opt(reldelim, root);
}
FREE(reldelim);
if (ret)
return ret;
}
ret = nv_pass_tex_mask(&pass, root);
if (ret)
return ret;
return ret;
}
int
nvc0_pc_exec_pass0(struct nv_pc *pc)
{
int i, ret;
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
return ret;
return 0;
}