- Remove Tilmann's custom truncate lowering: it completely hosed over
DAGcombine's ability to find reasons to remove truncates when they were not
needed. Consequently, the CellSPU backend would produce correct, but _really
slow and horrible_, code.
Replaced with instruction sequences that do the equivalent truncation in
SPUInstrInfo.td.
- Re-examine how unaligned loads and stores work. Generated unaligned
load code has been tested on the CellSPU hardware; see the i32operations.c
and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be
toy test code, it does prove that some real world code does compile
correctly.)
- Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc
fault because i64 ult is not yet implemented.)
- Added i64 eq and neq for setcc and select/setcc; started new instruction
information file for them in SPU64InstrInfo.td. Additional i64 operations
should be added to this file and not to SPUInstrInfo.td.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 442d491..37a5870 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -34,10 +34,14 @@
inline bool isCondBranch(const MachineInstr *I) {
unsigned opc = I->getOpcode();
- return (opc == SPU::BRNZ
- || opc == SPU::BRZ
- || opc == SPU::BRHNZ
- || opc == SPU::BRHZ);
+ return (opc == SPU::BRNZr32
+ || opc == SPU::BRNZv4i32
+ || opc == SPU::BRZr32
+ || opc == SPU::BRZv4i32
+ || opc == SPU::BRHNZr16
+ || opc == SPU::BRHNZv8i16
+ || opc == SPU::BRHZr16
+ || opc == SPU::BRHZv8i16);
}
}
@@ -103,6 +107,19 @@
return true;
}
break;
+ case SPU::LRr8:
+ case SPU::LRr16:
+ case SPU::LRr32:
+ case SPU::LRf32:
+ case SPU::LRr64:
+ case SPU::LRf64:
+ case SPU::LRr128:
+ case SPU::LRv16i8:
+ case SPU::LRv8i16:
+ case SPU::LRv4i32:
+ case SPU::LRv4f32:
+ case SPU::LRv2i64:
+ case SPU::LRv2f64:
case SPU::ORv16i8_i8:
case SPU::ORv8i16_i16:
case SPU::ORv4i32_i32:
@@ -114,7 +131,18 @@
case SPU::ORi32_v4i32:
case SPU::ORi64_v2i64:
case SPU::ORf32_v4f32:
- case SPU::ORf64_v2f64:
+ case SPU::ORf64_v2f64: {
+ assert(MI.getNumOperands() == 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid SPU OR<type>_<vec> instruction!");
+ if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ sourceReg = MI.getOperand(0).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ }
case SPU::ORv16i8:
case SPU::ORv8i16:
case SPU::ORv4i32:
@@ -198,18 +226,14 @@
case SPU::STQDr8: {
const MachineOperand MOp1 = MI->getOperand(1);
const MachineOperand MOp2 = MI->getOperand(2);
- if (MOp1.isImm()
- && (MOp2.isFI()
- || (MOp2.isReg() && MOp2.getReg() == SPU::R1))) {
- if (MOp2.isFI())
- FrameIndex = MOp2.getIndex();
- else
- FrameIndex = MOp1.getImm() / SPUFrameInfo::stackSlotSize();
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
return MI->getOperand(0).getReg();
}
break;
}
- case SPU::STQXv16i8:
+#if 0
+ case SPU::STQXv16i8:
case SPU::STQXv8i16:
case SPU::STQXv4i32:
case SPU::STQXv4f32:
@@ -226,6 +250,7 @@
return MI->getOperand(0).getReg();
}
break;
+#endif
}
return 0;
}
@@ -292,6 +317,8 @@
opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
} else if (RC == SPU::R8CRegisterClass) {
opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
} else {
assert(0 && "Unknown regclass!");
abort();
@@ -366,6 +393,8 @@
opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
} else if (RC == SPU::R8CRegisterClass) {
opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
} else {
assert(0 && "Unknown regclass in loadRegFromStackSlot!");
abort();