nouveau: fix regression since float comparison instructions (v2)
Fix the return type and allow src and dst types for comparison
to be separate, this at least fixes the two test cases I've written.
v2: drop the u32->s32 change
Acked-by: Christoph Bumiller <christoph.bumiller@speed.at>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index 70e5e22..6d9c830 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -219,13 +219,13 @@
}
CmpInstruction *
-BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
- Value *src0, Value *src1, Value *src2)
+BuildUtil::mkCmp(operation op, CondCode cc, DataType dstTy, Value *dst,
+ DataType srcTy, Value *src0, Value *src1, Value *src2)
{
CmpInstruction *insn = new_CmpInstruction(func, op);
insn->setType((dst->reg.file == FILE_PREDICATE ||
- dst->reg.file == FILE_FLAGS) ? TYPE_U8 : ty, ty);
+ dst->reg.file == FILE_FLAGS) ? TYPE_U8 : dstTy, srcTy);
insn->setCondition(cc);
insn->setDef(0, dst);
insn->setSrc(0, src0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
index 2305a27..a610c77 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
@@ -74,7 +74,7 @@
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
CmpInstruction *mkCmp(operation, CondCode, DataType,
Value *,
- Value *, Value *, Value * = NULL);
+ DataType, Value *, Value *, Value * = NULL);
TexInstruction *mkTex(operation, TexTarget,
uint16_t tic, uint16_t tsc,
const std::vector<Value *> &def,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 3193ea6..49a45f8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1776,7 +1776,7 @@
mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
- mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0);
+ mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
}
}
@@ -2315,8 +2315,8 @@
src0 = fetchSrc(0, c);
val0 = getScratch();
val1 = getScratch();
- mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero);
- mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero);
+ mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
+ mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
if (srcTy == TYPE_F32)
mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
else
@@ -2333,7 +2333,7 @@
mkMov(dst0[c], src1);
else
mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
- srcTy, dst0[c], src1, src2, src0);
+ srcTy, dst0[c], srcTy, src1, src2, src0);
}
break;
case TGSI_OPCODE_FRC:
@@ -2380,13 +2380,13 @@
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
- mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1);
+ mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
}
break;
case TGSI_OPCODE_KILL_IF:
val0 = new_LValue(func, FILE_PREDICATE);
for (c = 0; c < 4; ++c) {
- mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero);
+ mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
}
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 56eaad3..caaf09f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -423,7 +423,7 @@
expandIntegerMUL(&bld,
bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q, b));
bld.mkOp2(OP_SUB, TYPE_U32, (m = bld.getSSA()), a, t);
- bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), m, b);
+ bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), TYPE_U32, m, b);
if (!isSignedType(ty)) {
div->op = OP_SUB;
div->setSrc(0, q);
@@ -1011,7 +1011,7 @@
return;
cdst = bld.getSSA(1, FILE_FLAGS);
- bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, bld.loadImm(NULL, 0), pred);
+ bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, insn->dType, bld.loadImm(NULL, 0), pred);
insn->setPredicate(insn->cc, cdst);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 8d94dd1..a838004 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1493,7 +1493,7 @@
// CAUTION: don't use pdst->getInsn, the definition might not be unique,
// delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
- bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, bld.mkImm(0), pred);
+ bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, insn->dType, bld.mkImm(0), pred);
insn->setPredicate(insn->cc, pdst);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 99bd2bf..fb82c72 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -751,7 +751,7 @@
else
tB = tA;
tA = bld.getSSA();
- bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0));
+ bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
if (d < 0)