[SystemZ] Add support for IBM z14 processor (3/3)
This adds support for the new 128-bit vector float instructions of z14.
Note that these instructions actually only operate on the f128 type,
since only each 128-bit vector register can hold only one 128-bit
float value. However, this is still preferable to the legacy 128-bit
float instructions, since those operate on pairs of floating-point
registers (so we can hold at most 8 values in registers), while the
new instructions use single vector registers (so we hold up to 32
value in registers).
Adding support includes:
- Enabling the instructions for the assembler/disassembler.
- CodeGen for the instructions. This includes allocating the f128
type now to the VR128BitRegClass instead of FP128BitRegClass.
- Scheduler description support for the instructions.
Note that for a small number of operations, we have no new vector
instructions (like integer <-> 128-bit float conversions), and so
we use the legacy instruction and then reformat the operand
(i.e. copy between a pair of floating-point registers and a
vector register).
llvm-svn: 308196
diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td
index ec62eba..fda9c30 100644
--- a/llvm/lib/Target/SystemZ/SystemZFeatures.td
+++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -217,6 +217,7 @@
"vector-enhancements-1", "VectorEnhancements1",
"Assume that the vector enhancements facility 1 is installed"
>;
+def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">;
def FeatureVectorPackedDecimal : SystemZFeature<
"vector-packed-decimal", "VectorPackedDecimal",
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index a806c9b..2d916d2 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -101,7 +101,10 @@
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
}
- addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+ if (Subtarget.hasVectorEnhancements1())
+ addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
+ else
+ addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
if (Subtarget.hasVector()) {
addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
@@ -453,12 +456,24 @@
setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f128, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::f128, Legal);
}
// We have fused multiply-addition for f32 and f64 but not f128.
setOperationAction(ISD::FMA, MVT::f32, Legal);
setOperationAction(ISD::FMA, MVT::f64, Legal);
- setOperationAction(ISD::FMA, MVT::f128, Expand);
+ if (Subtarget.hasVectorEnhancements1())
+ setOperationAction(ISD::FMA, MVT::f128, Legal);
+ else
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+ // We don't have a copysign instruction on vector registers.
+ if (Subtarget.hasVectorEnhancements1())
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
// Needed so that we don't try to implement f128 constant loads using
// a load-and-extend of a f80 constant (in cases where the constant
@@ -466,6 +481,12 @@
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
+ // We don't have extending load instruction on vector registers.
+ if (Subtarget.hasVectorEnhancements1()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
+ }
+
// Floating-point truncation and stores need to be done separately.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
@@ -530,7 +551,7 @@
case MVT::f64:
return true;
case MVT::f128:
- return false;
+ return Subtarget.hasVectorEnhancements1();
default:
break;
}
@@ -6176,6 +6197,7 @@
case SystemZ::SelectF32:
case SystemZ::SelectF64:
case SystemZ::SelectF128:
+ case SystemZ::SelectVR128:
return emitSelect(MI, MBB, 0);
case SystemZ::CondStore8Mux:
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 10172bd..02aeaad 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -12,9 +12,12 @@
//===----------------------------------------------------------------------===//
// C's ?: operator for floating-point operands.
-def SelectF32 : SelectWrapper<FP32>;
-def SelectF64 : SelectWrapper<FP64>;
-def SelectF128 : SelectWrapper<FP128>;
+def SelectF32 : SelectWrapper<f32, FP32>;
+def SelectF64 : SelectWrapper<f64, FP64>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def SelectF128 : SelectWrapper<f128, FP128>;
+let Predicates = [FeatureVectorEnhancements1] in
+ def SelectVR128 : SelectWrapper<f128, VR128>;
defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
@@ -69,8 +72,9 @@
let Predicates = [FeatureVector] in {
defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
- defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
}
+let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in
+ defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
// Moves between 64-bit integer and floating-point registers.
def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
@@ -83,8 +87,12 @@
}
// The sign of an FP128 is in the high register.
-def : Pat<(fcopysign FP32:$src1, FP128:$src2),
- (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))),
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureVectorEnhancements1] in
+ def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
// fcopysign with an FP64 result.
let isCodeGenOnly = 1 in
@@ -92,8 +100,12 @@
def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
// The sign of an FP128 is in the high register.
-def : Pat<(fcopysign FP64:$src1, FP128:$src2),
- (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))),
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureVectorEnhancements1] in
+ def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
// the low half as-is.
@@ -101,12 +113,14 @@
: Pat<(fcopysign FP128:$src1, cls:$src2),
(INSERT_SUBREG FP128:$src1, upper, subreg_h64)>;
-def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
- FP32:$src2)>;
-def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
- FP64:$src2)>;
-def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
- (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP32:$src2)>;
+ def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP64:$src2)>;
+ def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+}
defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
@@ -166,20 +180,32 @@
def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
Requires<[FeatureFPExtension]>;
-def : Pat<(f32 (fpround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
-def : Pat<(f64 (fpround FP128:$src)),
- (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : Pat<(f32 (fpround FP128:$src)),
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
+ def : Pat<(f64 (fpround FP128:$src)),
+ (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
+}
// Extend register floating-point values to wider representations.
-def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
-def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>;
-def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>;
+def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
+def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
+ def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
+}
// Extend memory floating-point values to wider representations.
def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
-def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>;
-def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>;
+def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
+def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : Pat<(f128 (extloadf32 bdxaddr12only:$src)),
+ (LXEB bdxaddr12only:$src)>;
+ def : Pat<(f128 (extloadf64 bdxaddr12only:$src)),
+ (LXDB bdxaddr12only:$src)>;
+}
// Convert a signed integer register value to a floating-point one.
def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
@@ -426,16 +452,18 @@
// f128 multiplication of two FP64 registers.
def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
-def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
- (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
- FP64:$src1, subreg_h64), FP64:$src2)>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+ (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+ FP64:$src1, subreg_h64), FP64:$src2)>;
// f128 multiplication of an FP64 register and an f64 memory.
def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
-def : Pat<(fmul (f128 (fpextend FP64:$src1)),
- (f128 (extloadf64 bdxaddr12only:$addr))),
- (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
- bdxaddr12only:$addr)>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fmul (f128 (fpextend FP64:$src1)),
+ (f128 (extloadf64 bdxaddr12only:$addr))),
+ (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
+ bdxaddr12only:$addr)>;
// Fused multiply-add.
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index f36e588..033a0a8 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -4672,10 +4672,10 @@
// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
// the value of the PSW's 2-bit condition code field.
-class SelectWrapper<RegisterOperand cls>
+class SelectWrapper<ValueType vt, RegisterOperand cls>
: Pseudo<(outs cls:$dst),
(ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
- [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2,
+ [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2,
imm32zx4:$valid, imm32zx4:$cc))]> {
let usesCustomInserter = 1;
// Although the instructions used by these nodes do not in themselves
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index eaa694b..4533f4f 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -869,6 +869,37 @@
return;
}
+ // Move 128-bit floating-point values between VR128 and FP128.
+ if (SystemZ::VR128BitRegClass.contains(DestReg) &&
+ SystemZ::FP128BitRegClass.contains(SrcReg)) {
+ unsigned SrcRegHi =
+ RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ unsigned SrcRegLo =
+ RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg)
+ .addReg(SrcRegHi, getKillRegState(KillSrc))
+ .addReg(SrcRegLo, getKillRegState(KillSrc));
+ return;
+ }
+ if (SystemZ::FP128BitRegClass.contains(DestReg) &&
+ SystemZ::VR128BitRegClass.contains(SrcReg)) {
+ unsigned DestRegHi =
+ RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ unsigned DestRegLo =
+ RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+
+ if (DestRegHi != SrcReg)
+ copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1);
+ return;
+ }
+
// Everything else needs only one instruction.
unsigned Opcode;
if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 46a7173..f64c0d1 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -324,9 +324,9 @@
// Select instructions
//===----------------------------------------------------------------------===//
-def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
-def Select32 : SelectWrapper<GR32>;
-def Select64 : SelectWrapper<GR64>;
+def Select32Mux : SelectWrapper<i32, GRX32>, Requires<[FeatureHighWord]>;
+def Select32 : SelectWrapper<i32, GR32>;
+def Select64 : SelectWrapper<i64, GR64>;
// We don't define 32-bit Mux stores if we don't have STOCFH, because the
// low-only STOC should then always be used if possible.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 9d8f74c..c9a02d9 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -938,6 +938,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
+ def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
}
// Convert from fixed 64-bit.
@@ -973,6 +974,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
+ def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
}
// Load FP integer.
@@ -984,8 +986,10 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
+ def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
defm : VectorRounding<VFISB, v128sb>;
defm : VectorRounding<WFISB, v32sb>;
+ defm : VectorRounding<WFIXB, v128xb>;
}
// Load lengthened.
@@ -998,6 +1002,9 @@
def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
}
+ def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
+ def : Pat<(f128 (fpextend (f32 VR32:$src))),
+ (WFLLD (WLDEB VR32:$src))>;
}
// Load rounded.
@@ -1012,6 +1019,10 @@
def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
}
+ def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
+ def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>;
+ def : Pat<(f32 (fpround (f128 VR128:$src))),
+ (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
}
// Maximum.
@@ -1029,10 +1040,13 @@
v128sb, v128sb, 2, 0>;
def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
v32sb, v32sb, 2, 8>;
+ def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
+ v128xb, v128xb, 4, 8>;
defm : VectorMax<VFMAXDB, v128db>;
defm : VectorMax<WFMAXDB, v64db>;
defm : VectorMax<VFMAXSB, v128sb>;
defm : VectorMax<WFMAXSB, v32sb>;
+ defm : VectorMax<WFMAXXB, v128xb>;
}
// Minimum.
@@ -1050,10 +1064,13 @@
v128sb, v128sb, 2, 0>;
def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
v32sb, v32sb, 2, 8>;
+ def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
+ v128xb, v128xb, 4, 8>;
defm : VectorMin<VFMINDB, v128db>;
defm : VectorMin<WFMINDB, v64db>;
defm : VectorMin<VFMINSB, v128sb>;
defm : VectorMin<WFMINSB, v32sb>;
+ defm : VectorMin<WFMINXB, v128xb>;
}
// Multiply.
@@ -1063,6 +1080,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
+ def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
}
// Multiply and add.
@@ -1072,6 +1090,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
+ def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
}
// Multiply and subtract.
@@ -1081,6 +1100,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
+ def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
}
// Negative multiply and add.
@@ -1090,6 +1110,7 @@
def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
+ def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
}
// Negative multiply and subtract.
@@ -1099,6 +1120,7 @@
def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
+ def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
}
// Perform sign operation.
@@ -1108,6 +1130,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>;
def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>;
+ def WFPSOXB : BinaryVRRa<"wfpsoxb", 0xE7CC, null_frag, v128xb, v128xb, 4, 8>;
}
// Load complement.
@@ -1116,6 +1139,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>;
def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>;
+ def WFLCXB : UnaryVRRa<"wflcxb", 0xE7CC, fneg, v128xb, v128xb, 4, 8, 0>;
}
// Load negative.
@@ -1124,6 +1148,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>;
def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>;
+ def WFLNXB : UnaryVRRa<"wflnxb", 0xE7CC, fnabs, v128xb, v128xb, 4, 8, 1>;
}
// Load positive.
@@ -1132,6 +1157,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>;
def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>;
+ def WFLPXB : UnaryVRRa<"wflpxb", 0xE7CC, fabs, v128xb, v128xb, 4, 8, 2>;
}
// Square root.
@@ -1141,6 +1167,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
+ def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
}
// Subtract.
@@ -1150,6 +1177,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
+ def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
}
// Test data class immediate.
@@ -1160,6 +1188,7 @@
let Predicates = [FeatureVectorEnhancements1] in {
def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>;
def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>;
+ def WFTCIXB : BinaryVRIe<"wftcixb", 0xE74A, null_frag, v128q, v128xb, 4, 8>;
}
}
}
@@ -1175,6 +1204,7 @@
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>;
+ def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>;
}
}
@@ -1184,6 +1214,7 @@
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>;
+ def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>;
}
}
@@ -1198,6 +1229,8 @@
v128f, v128sb, 2, 0>;
defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
v32f, v32sb, 2, 8>;
+ defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
}
// Compare and signal equal.
@@ -1210,6 +1243,8 @@
v128f, v128sb, 2, 4>;
defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag,
v32f, v32sb, 2, 12>;
+ defm WFKEXB : BinaryVRRcSPair<"wfkexb", 0xE7E8, null_frag, null_frag,
+ v128q, v128xb, 4, 12>;
}
// Compare high.
@@ -1223,6 +1258,8 @@
v128f, v128sb, 2, 0>;
defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
v32f, v32sb, 2, 8>;
+ defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
}
// Compare and signal high.
@@ -1235,6 +1272,8 @@
v128f, v128sb, 2, 4>;
defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag,
v32f, v32sb, 2, 12>;
+ defm WFKHXB : BinaryVRRcSPair<"wfkhxb", 0xE7EB, null_frag, null_frag,
+ v128q, v128xb, 4, 12>;
}
// Compare high or equal.
@@ -1248,6 +1287,8 @@
v128f, v128sb, 2, 0>;
defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
v32f, v32sb, 2, 8>;
+ defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
}
// Compare and signal high or equal.
@@ -1260,6 +1301,8 @@
v128f, v128sb, 2, 4>;
defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag,
v32f, v32sb, 2, 12>;
+ defm WFKHEXB : BinaryVRRcSPair<"wfkhexb", 0xE7EA, null_frag, null_frag,
+ v128q, v128xb, 4, 12>;
}
}
@@ -1272,36 +1315,49 @@
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>;
+
+def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>;
//===----------------------------------------------------------------------===//
// Replicating scalars
@@ -1378,6 +1434,20 @@
}
//===----------------------------------------------------------------------===//
+// Support for 128-bit floating-point values in vector registers
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVectorEnhancements1] in {
+ def : Pat<(f128 (load bdxaddr12only:$addr)),
+ (VL bdxaddr12only:$addr)>;
+ def : Pat<(store (f128 VR128:$src), bdxaddr12only:$addr),
+ (VST VR128:$src, bdxaddr12only:$addr)>;
+
+ def : Pat<(f128 fpimm0), (VZERO)>;
+ def : Pat<(f128 fpimmneg0), (WFLNXB (VZERO))>;
+}
+
+//===----------------------------------------------------------------------===//
// String instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index ef5a264..52ba1a5 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -260,10 +260,10 @@
// All vector registers.
defm VR128 : SystemZRegClass<"VR128",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
- (add (sequence "V%u", 0, 7),
- (sequence "V%u", 16, 31),
- (sequence "V%u", 8, 15))>;
+ [f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (add (sequence "V%u", 0, 7),
+ (sequence "V%u", 16, 31),
+ (sequence "V%u", 8, 15))>;
// Attaches a ValueType to a register operand, to make the instruction
// definitions easier.
@@ -283,6 +283,7 @@
def v128q : TypedReg<v16i8, VR128>;
def v128sb : TypedReg<v4f32, VR128>;
def v128db : TypedReg<v2f64, VR128>;
+def v128xb : TypedReg<f128, VR128>;
def v128any : TypedReg<untyped, VR128>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index f9407eb..f11177a 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -753,7 +753,7 @@
// FP: Select instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[FXa], (instregex "SelectF(32|64|128)$")>;
+def : InstRW<[FXa], (instregex "Select(F32|F64|F128|VR128)$")>;
def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>;
@@ -1319,18 +1319,23 @@
def : InstRW<[VecBF], (instregex "VFL(L|R)$")>;
def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>;
def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>;
+def : InstRW<[VecBF2], (instregex "WFLLD$")>;
+def : InstRW<[VecDF2, Lat10], (instregex "WFLRX$")>;
def : InstRW<[VecBF2], (instregex "VFI$")>;
def : InstRW<[VecBF], (instregex "VFIDB$")>;
def : InstRW<[VecBF], (instregex "WFIDB$")>;
def : InstRW<[VecBF2], (instregex "VFISB$")>;
def : InstRW<[VecBF], (instregex "WFISB$")>;
+def : InstRW<[VecDF2, Lat10], (instregex "WFIXB$")>;
// Sign operations
def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>;
+def : InstRW<[VecXsPm], (instregex "WFPSOXB$")>;
def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>;
def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>;
+def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)XB$")>;
// Minimum / maximum
def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>;
@@ -1338,11 +1343,13 @@
def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>;
def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>;
def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>;
+def : InstRW<[VecDFX], (instregex "WF(MAX|MIN)XB$")>;
// Test data class
def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>;
+def : InstRW<[VecDFX, Lat4], (instregex "WFTCIXB$")>;
// Add / subtract
def : InstRW<[VecBF2], (instregex "VF(A|S)$")>;
@@ -1350,6 +1357,7 @@
def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>;
def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>;
def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>;
+def : InstRW<[VecDF2, Lat10], (instregex "WF(A|S)XB$")>;
// Multiply / multiply-and-add/subtract
def : InstRW<[VecBF2], (instregex "VFM$")>;
@@ -1357,19 +1365,23 @@
def : InstRW<[VecBF], (instregex "WFMDB$")>;
def : InstRW<[VecBF2], (instregex "VFMSB$")>;
def : InstRW<[VecBF], (instregex "WFMSB$")>;
+def : InstRW<[VecDF2, Lat20], (instregex "WFMXB$")>;
def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>;
def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>;
def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>;
def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>;
def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>;
+def : InstRW<[VecDF2, Lat20], (instregex "WF(N)?M(A|S)XB$")>;
// Divide / square root
def : InstRW<[VecFPd], (instregex "VFD$")>;
def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>;
def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>;
+def : InstRW<[VecFPd], (instregex "WFDXB$")>;
def : InstRW<[VecFPd], (instregex "VFSQ$")>;
def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>;
+def : InstRW<[VecFPd], (instregex "WFSQXB$")>;
//===----------------------------------------------------------------------===//
// Vector: Floating-point comparison
@@ -1380,13 +1392,16 @@
def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>;
def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>;
def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[VecDFX], (instregex "WF(C|K)(E|H|HE)XB$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)(E|H|HE)XBS$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>;
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>;
+def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)XB$")>;
//===----------------------------------------------------------------------===//
// Vector: Floating-point insertion and extraction