AMDGPU: Try to use op_sel when selecting packed instructions
Avoids instructions to pack a vector when the source is really
a scalar being broadcast.
Also be smarter and look for per-component fneg.
Doesn't yet handle scalar from upper half of register
or other swizzles.
llvm-svn: 303291
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 7c99752..c3ac796 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1707,10 +1707,38 @@
// FIXME: Look for on separate components
if (Src.getOpcode() == ISD::FNEG) {
- Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Src = Src.getOperand(0);
}
+ if (Src.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned VecMods = Mods;
+
+ SDValue Lo = Src.getOperand(0);
+ SDValue Hi = Src.getOperand(1);
+
+ if (Lo.getOpcode() == ISD::FNEG) {
+ Lo = Lo.getOperand(0);
+ Mods ^= SISrcMods::NEG;
+ }
+
+ if (Hi.getOpcode() == ISD::FNEG) {
+ Hi = Hi.getOperand(0);
+ Mods ^= SISrcMods::NEG_HI;
+ }
+
+ if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
+ // Really a scalar input. Just select from the low half of the register to
+ // avoid packing.
+
+ Src = Lo;
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ Mods = VecMods;
+ }
+
// Packed instructions do not have abs modifiers.
// FIXME: Handle abs/neg of individual components.