Add support for code generation of the one register with immediate form of vorr.
We could be more aggressive about making this work for a larger range of constants,
but this seems like a good start.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118201 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 2e4fa32..7a3a747 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -101,6 +101,7 @@
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::OR, VT.getSimpleVT(), Custom);
setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -820,6 +821,7 @@
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
case ARMISD::BFI: return "ARMISD::BFI";
+ case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
}
}
@@ -3431,6 +3433,32 @@
return SDValue();
}
+static SDValue LowerOR(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op1 = Op.getOperand(1);
+ while (Op1.getOpcode() == ISD::BIT_CONVERT && Op1.getOperand(0) != Op1)
+ Op1 = Op1.getOperand(0);
+ if (Op1.getOpcode() != ARMISD::VMOVIMM) return Op;
+
+ ConstantSDNode* TargetConstant = cast<ConstantSDNode>(Op1.getOperand(0));
+ uint32_t ConstVal = TargetConstant->getZExtValue();
+
+ // FIXME: VORRIMM only supports immediate encodings of 16 and 32 bit size.
+ // In theory for VMOVIMMs whose value is already encoded as with an
+ // 8 bit encoding, we could re-encode it as a 16 or 32 bit immediate.
+ EVT VorrVT = Op1.getValueType();
+ EVT EltVT = VorrVT.getVectorElementType();
+ if (EltVT != MVT::i16 && EltVT != MVT::i32) return Op;
+
+ ConstVal |= 0x0100;
+ SDValue OrConst = DAG.getTargetConstant(ConstVal, MVT::i32);
+
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue toTy = DAG.getNode(ISD::BIT_CONVERT, dl, VorrVT, Op.getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, toTy, OrConst);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vorr);
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
@@ -3899,6 +3927,7 @@
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::OR: return LowerOR(Op, DAG);
}
return SDValue();
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 8504b83..4a7dec2 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -162,7 +162,10 @@
FMIN,
// Bit-field insert
- BFI
+ BFI,
+
+ // Vector OR with immediate
+ VORRIMM
};
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index cc9fc19..29a1f2c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -69,6 +69,10 @@
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+
def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
@@ -3295,6 +3299,43 @@
def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
v4i32, v4i32, or, 1>;
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{11-9} = SIMM{11-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{11-9} = SIMM{11-9};
+}
+
+
// VBIC : Vector Bitwise Bit Clear (AND NOT)
def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,