AVX-512: Added all forms of FP compare instructions for KNL and SKX.
Added intrinsics for the instructions. CC parameter of the intrinsics was changed from i8 to i32 according to the spec.
By Igor Breger (igor.breger@intel.com)
llvm-svn: 236714
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 9a85a33..43aa898 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4183,13 +4183,33 @@
}
// Misc.
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
- Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i8_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
- Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
+ def int_x86_avx512_mask_cmp_ps_512 :
+ GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
+ Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_pd_512 :
+ GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_ps_256 :
+ GCCBuiltin<"__builtin_ia32_cmpps256_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_pd_256 :
+ GCCBuiltin<"__builtin_ia32_cmppd256_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_ps_128 :
+ GCCBuiltin<"__builtin_ia32_cmpps128_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_pd_128 :
+ GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_movntdqa :
+ GCCBuiltin<"__builtin_ia32_movntdqa512">,
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7081c74..dc57620 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -210,14 +210,6 @@
if (Name == "x86.avx2.mpsadbw")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
-
- if (Name == "x86.avx512.mask.cmp.ps.512")
- return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
- NewFn);
- if (Name == "x86.avx512.mask.cmp.pd.512")
- return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
- NewFn);
-
if (Name == "x86.avx512.mask.cmp.b.512")
return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
NewFn);
@@ -799,21 +791,6 @@
CI->eraseFromParent();
return;
}
- case Intrinsic::x86_avx512_mask_cmp_ps_512:
- case Intrinsic::x86_avx512_mask_cmp_pd_512: {
- // Need to truncate the last argument from i32 to i8 -- this argument models
- // an inherently 8-bit immediate operand to these x86 instructions.
- SmallVector<Value *, 5> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
-
- // Replace the last argument with a trunc.
- Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc");
-
- CallInst *NewCall = Builder.CreateCall(NewFn, Args);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- return;
- }
}
}
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 93c6ea0..244a94e 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1414,7 +1414,8 @@
X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
- consumeToken(); // Eat "{"
+ // Eat "{" and mark the current place.
+ const SMLoc consumedToken = consumeToken();
if (Tok.getIdentifier().startswith("r")){
int rndMode = StringSwitch<int>(Tok.getIdentifier())
.Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
@@ -1436,6 +1437,13 @@
MCConstantExpr::Create(rndMode, Parser.getContext());
return X86Operand::CreateImm(RndModeOp, Start, End);
}
+ if(Tok.getIdentifier().equals("sae")){
+ Parser.Lex(); // Eat the sae
+ if (!getLexer().is(AsmToken::RCurly))
+ return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+ Parser.Lex(); // Eat "}"
+ return X86Operand::CreateToken("{sae}", consumedToken);
+ }
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
}
/// ParseIntelMemOperand - Parse intel style memory operand.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 49f0a8a..5e23b5b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14926,12 +14926,27 @@
Mask.getValueType().getSizeInBits());
SDValue Cmp;
if (IntrData->Type == CMP_MASK_CC) {
- Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
+ SDValue CC = Op.getOperand(3);
+ CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
+ // We specify 2 possible opcodes for intrinsics with rounding modes.
+ // First, we check if the intrinsic may have non-default rounding mode,
+ // (IntrData->Opc1 != 0), then we check the rounding mode operand.
+ if (IntrData->Opc1 != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), CC, Rnd);
+ }
+ //default rounding mode
+ if(!Cmp.getNode())
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), CC);
+
} else {
assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2));
+ Op.getOperand(2));
}
SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
DAG.getTargetConstant(0, dl,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 0d24c6a..d2593d5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -308,6 +308,8 @@
/// integer signed and unsigned data types.
CMPM,
CMPMU,
+ // Vector comparison with rounding mode for FP values
+ CMPM_RND,
// Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 41f4584..bc9ee59 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -305,8 +305,8 @@
Pattern, itin>;
def NAME#k: AVX512<O, F, Outs, MaskingIns,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"#
- "$dst {${mask}}"#Round#", "#IntelSrcAsm#"}",
+ OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
+ "$dst {${mask}}, "#IntelSrcAsm#Round#"}",
MaskingPattern, itin>, EVEX_K;
}
@@ -335,6 +335,14 @@
(and _.KRCWM:$mask, RHS),
Round, itin>;
+multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm> :
+ AVX512_maskable_custom_cmp<O, F, Outs,
+ Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [],[],"", NoItinerary>;
+
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
let Predicates = [HasAVX512] in {
@@ -1590,53 +1598,97 @@
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-// avx512_cmp_packed - compare packed instructions
-multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
- X86MemOperand x86memop, ValueType vt,
- string suffix, Domain d> {
- def rri : AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
- let hasSideEffects = 0 in
- def rrib: AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", suffix,
- "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
- [], d>, EVEX_B;
- def rmi : AVX512PIi8<0xC2, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [(set KRC:$dst,
- (X86cmpm (vt RC:$src1), (load addr:$src2), imm:$cc))], d>;
+multiclass avx512_vcmp_common<X86VectorVTInfo _> {
+ defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)>;
+
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)>;
+
+ defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ imm:$cc)>,EVEX_B;
+ }
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- !strconcat("vcmp", suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
- def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- !strconcat("vcmp", suffix,
- "\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"),
- [], d>, EVEX_B;
- let mayLoad = 1 in
- def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- !strconcat("vcmp", suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
+ defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">;
+
+ let mayLoad = 1 in {
+ defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">;
+
+ defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, ${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
+ }
+ }
+}
+
+multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
+ // comparison code form (VCMP[EQ/LT/LE/...]
+ defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ (X86cmpmRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc,
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+
+ let isAsmParserOnly = 1, hasSideEffects = 0 in {
+ defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc,{sae}, $src2, $src1",
+ "$src1, $src2,{sae}, $cc">, EVEX_B;
+ }
+}
+
+multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcmp_common<_.info512>,
+ avx512_vcmp_sae<_.info512>, EVEX_V512;
+
+ }
+ let Predicates = [HasAVX512,HasVLX] in {
+ defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
+ defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
}
}
-defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
- "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
- "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
- EVEX_CD8<64, CD8VF>;
+defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
+ AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
+ AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (VCMPPSZrri
@@ -1654,30 +1706,7 @@
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
-def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
- (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR16)>;
-
-def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
- (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR8)>;
-
-def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
- (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
- FROUND_CURRENT)),
- (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR16)>;
-
-def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
- (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
- FROUND_CURRENT)),
- (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
- (I8Imm imm:$cc)), GR8)>;
-
+//-----------------------------------------------------------------
// Mask register copy, including
// - copy between mask registers
// - load/store mask registers
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index 56043fb..ed480ec 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -747,6 +747,14 @@
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
+class AVX512PSIi8Base : PS {
+ Domain ExeDomain = SSEPackedSingle;
+ ImmType ImmT = Imm8;
+}
+class AVX512PDIi8Base : PD {
+ Domain ExeDomain = SSEPackedDouble;
+ ImmType ImmT = Imm8;
+}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0bf9d1d..497bdf6 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -147,14 +147,21 @@
def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
def X86CmpMaskCC :
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVec<1>,
- SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisSameAs<2, 1>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
+def X86CmpMaskCCRound :
+ SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisSameAs<2, 1>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
+ SDTCisInt<4>]>;
def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
-def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
-def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
-def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
+def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
+def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
def X86vshl : SDNode<"X86ISD::VSHL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 7f6be89..648769e 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -277,18 +277,26 @@
X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,
+ X86ISD::CMPM_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM,
+ X86ISD::CMPM_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 59a9c71..d4129e1 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -392,17 +392,17 @@
define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
- %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i8 2, i16 -1, i32 8)
+ %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
ret i16 %res
}
- declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i8, i16, i32)
+ declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
- %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i8 4, i8 -1, i32 4)
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
ret i8 %res
}
- declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i8, i8, i32)
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
; cvt intrinsics
define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 4808ea9..e1f6276 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1,36 +1,37 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test1:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test2:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test3:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -38,98 +39,120 @@
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test4_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test4_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: test5:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test5:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
-; CHECK-LABEL: test6_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test6_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
}
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: test7:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
-; CHECK-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test7:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
+; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
+; SKX-LABEL: test7:
+; SKX: ## BB#0:
+; SKX: vxorps %xmm2, %xmm2, %xmm2
+; SKX: vcmpltps %xmm2, %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
+
%mask = fcmp olt <4 x float> %a, zeroinitializer
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
ret <4 x float>%c
}
define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: test8:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
-; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test8:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
+; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
+; SKX-LABEL: test8:
+; SKX: ## BB#0:
+; SKX: vxorpd %xmm2, %xmm2, %xmm2
+; SKX: vcmpltpd %xmm2, %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
ret <2 x double>%c
}
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: test9:
-; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test9:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; KNL-NEXT: retq
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
}
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
-; CHECK-LABEL: test10:
-; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test10:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; KNL-NEXT: retq
+; SKX-LABEL: test10:
+; SKX: ## BB#0:
+; SKX: vcmpeqps %ymm1, %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
+
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
ret <8 x float> %max
}
define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: test11_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test11_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@@ -137,25 +160,25 @@
define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
-; CHECK-LABEL: test12:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
-; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
-; CHECK-NEXT: kunpckbw %k0, %k1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test12:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
+; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
+; KNL-NEXT: kunpckbw %k0, %k1, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
+; KNL-NEXT: retq
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
}
define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
-; CHECK-LABEL: test13:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test13:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
@@ -163,14 +186,14 @@
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
-; CHECK-LABEL: test14:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test14:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: knotw %k0, %k1
+; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
@@ -180,14 +203,14 @@
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
-; CHECK-LABEL: test15:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test15:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1
+; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: knotw %k0, %k1
+; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
@@ -197,24 +220,24 @@
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test16:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1
+; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test17:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test17:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -222,12 +245,12 @@
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test18:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test18:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -235,12 +258,12 @@
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test19:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test19:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -248,13 +271,13 @@
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test20:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test20:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@@ -263,13 +286,13 @@
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test21:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test21:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
+; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
+; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@@ -278,13 +301,13 @@
}
define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test22:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test22:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
+; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
@@ -294,13 +317,13 @@
}
define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test23:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test23:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
@@ -310,12 +333,12 @@
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
-; CHECK-LABEL: test24:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test24:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -325,12 +348,12 @@
}
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test25:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test25:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -340,13 +363,13 @@
}
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test26:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test26:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
@@ -358,13 +381,13 @@
}
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test27:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test27:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
@@ -375,10 +398,10 @@
ret <8 x i64> %max
}
-; CHECK-LABEL: test28
-; CHECK: vpcmpgtq
-; CHECK: vpcmpgtq
-; CHECK: kxorw
+; KNL-LABEL: test28
+; KNL: vpcmpgtq
+; KNL: vpcmpgtq
+; KNL: kxorw
define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
%x_gt_y = icmp sgt <8 x i64> %x, %y
%x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
@@ -387,14 +410,188 @@
ret <8 x i32> %resse
}
-; CHECK-LABEL: test29
-; CHECK: vpcmpgtd
-; CHECK: vpcmpgtd
-; CHECK: kxnorw
+; KNL-LABEL: test29
+; KNL: vpcmpgtd
+; KNL: vpcmpgtd
+; KNL: kxnorw
define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
%x_gt_y = icmp sgt <16 x i32> %x, %y
%x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
%res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
%resse = sext <16 x i1>%res to <16 x i8>
ret <16 x i8> %resse
-}
\ No newline at end of file
+}
+
+define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
+; SKX-LABEL: test30:
+; SKX: vcmpeqpd %ymm1, %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %mask = fcmp oeq <4 x double> %x, %y
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
+ ret <4 x double> %max
+}
+
+define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
+; SKX-LABEL: test31:
+; SKX: vcmpltpd (%rdi), %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+
+ %y = load <2 x double>, <2 x double>* %yp, align 4
+ %mask = fcmp olt <2 x double> %x, %y
+ %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
+ ret <2 x double> %max
+}
+
+define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
+; SKX-LABEL: test32:
+; SKX: vcmpltpd (%rdi), %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %y = load <4 x double>, <4 x double>* %yp, align 4
+ %mask = fcmp ogt <4 x double> %y, %x
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
+ ret <4 x double> %max
+}
+
+define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
+; SKX-LABEL: test33:
+; SKX: vcmpltpd (%rdi), %zmm0, %k1
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+ %y = load <8 x double>, <8 x double>* %yp, align 4
+ %mask = fcmp olt <8 x double> %x, %y
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
+
+define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
+; SKX-LABEL: test34:
+; SKX: vcmpltps (%rdi), %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+ %y = load <4 x float>, <4 x float>* %yp, align 4
+ %mask = fcmp olt <4 x float> %x, %y
+ %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
+ ret <4 x float> %max
+}
+
+define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
+; SKX-LABEL: test35:
+; SKX: vcmpltps (%rdi), %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+
+ %y = load <8 x float>, <8 x float>* %yp, align 4
+ %mask = fcmp ogt <8 x float> %y, %x
+ %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
+ ret <8 x float> %max
+}
+
+define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
+; SKX-LABEL: test36:
+; SKX: vcmpltps (%rdi), %zmm0, %k1
+; SKX: vmovaps %zmm0, %zmm1 {%k1}
+ %y = load <16 x float>, <16 x float>* %yp, align 4
+ %mask = fcmp olt <16 x float> %x, %y
+ %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
+ ret <16 x float> %max
+}
+
+define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test37:
+; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <8 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
+
+ %mask = fcmp ogt <8 x double> %shuffle, %x
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
+
+define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test38:
+; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <4 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
+
+ %mask = fcmp ogt <4 x double> %shuffle, %x
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
+ ret <4 x double> %max
+}
+
+define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test39:
+; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+
+ %mask = fcmp ogt <2 x double> %shuffle, %x
+ %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
+ ret <2 x double> %max
+}
+
+
+define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test40:
+; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1
+; SKX: vmovaps %zmm0, %zmm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <16 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <16 x float> %shuffle, %x
+ %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
+ ret <16 x float> %max
+}
+
+define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test41:
+; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <8 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <8 x float> %shuffle, %x
+ %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
+ ret <8 x float> %max
+}
+
+define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test42:
+; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <4 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <4 x float> %shuffle, %x
+ %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
+ ret <4 x float> %max
+}
+
+define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
+; SKX-LABEL: test43:
+; SKX: vpmovw2m %xmm2, %k1
+; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <8 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
+
+ %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
+ %mask = and <8 x i1> %mask_cmp, %mask_in
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index 4db5df5..64ec816 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -2261,3 +2261,31 @@
}
declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
+ ;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
+
+define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
+ ;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
+
+define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
+ ;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
+
+define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
+ ;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
diff --git a/llvm/test/MC/Disassembler/X86/avx-512.txt b/llvm/test/MC/Disassembler/X86/avx-512.txt
index cfe5ffd..d618e7e 100644
--- a/llvm/test/MC/Disassembler/X86/avx-512.txt
+++ b/llvm/test/MC/Disassembler/X86/avx-512.txt
@@ -137,5 +137,5 @@
# CHECK: vpcmpd $8, %zmm10, %zmm25, %k5
0x62 0xd3 0x35 0x40 0x1f 0xea 0x8
-# CHECK: vcmppd {sae}, $127, %zmm27, %zmm11, %k4
+# CHECK: vcmppd $127,{sae}, %zmm27, %zmm11, %k4
0x62 0x91 0xa5 0x58 0xc2 0xe3 0x7f
diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s
index fa74005..91107cc 100644
--- a/llvm/test/MC/X86/avx512-encodings.s
+++ b/llvm/test/MC/X86/avx512-encodings.s
@@ -5983,3 +5983,132 @@
// CHECK: vpermilpd
// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x05,0x53,0x10,0x23]
vpermilpd $0x23, 0x400(%rbx), %zmm2
+
+// CHECK: vcmppd $171, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab]
+ vcmppd $0xab, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $171, %zmm26, %zmm12, %k2 {%k3}
+// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
+ vcmppd $0xab, %zmm26, %zmm12, %k2 {%k3}
+
+// CHECK: vcmppd $171,{sae}, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
+ vcmppd $0xab,{sae}, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $123, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
+ vcmppd $0x7b, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $123,{sae}, %zmm26, %zmm12, %k2
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
+ vcmppd $0x7b,{sae}, %zmm26, %zmm12, %k2
+
+// CHECK: vcmppd $123, (%rcx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b]
+ vcmppd $0x7b, (%rcx), %zmm12, %k2
+
+// CHECK: vcmppd $123, 291(%rax,%r14,8), %zmm12, %k2
+// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd $0x7b, 291(%rax,%r14,8), %zmm12, %k2
+
+// CHECK: vcmppd $123, (%rcx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b]
+ vcmppd $0x7b, (%rcx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, 8128(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b]
+ vcmppd $0x7b, 8128(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, 8192(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmppd $0x7b, 8192(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, -8192(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b]
+ vcmppd $0x7b, -8192(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, -8256(%rdx), %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmppd $0x7b, -8256(%rdx), %zmm12, %k2
+
+// CHECK: vcmppd $123, 1016(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b]
+ vcmppd $0x7b, 1016(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, 1024(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd $0x7b, 1024(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, -1024(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b]
+ vcmppd $0x7b, -1024(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmppd $123, -1032(%rdx){1to8}, %zmm12, %k2
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd $0x7b, -1032(%rdx){1to8}, %zmm12, %k2
+
+// CHECK: vcmpps $171, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab]
+ vcmpps $0xab, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $171, %zmm22, %zmm17, %k2 {%k3}
+// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
+ vcmpps $0xab, %zmm22, %zmm17, %k2 {%k3}
+
+// CHECK: vcmpps $171,{sae}, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
+ vcmpps $0xab,{sae}, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $123, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
+ vcmpps $0x7b, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $123,{sae}, %zmm22, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
+ vcmpps $0x7b,{sae}, %zmm22, %zmm17, %k2
+
+// CHECK: vcmpps $123, (%rcx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b]
+ vcmpps $0x7b, (%rcx), %zmm17, %k2
+
+// CHECK: vcmpps $123, 291(%rax,%r14,8), %zmm17, %k2
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps $0x7b, 291(%rax,%r14,8), %zmm17, %k2
+
+// CHECK: vcmpps $123, (%rcx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b]
+ vcmpps $0x7b, (%rcx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, 8128(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b]
+ vcmpps $0x7b, 8128(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, 8192(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmpps $0x7b, 8192(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, -8192(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b]
+ vcmpps $0x7b, -8192(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, -8256(%rdx), %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmpps $0x7b, -8256(%rdx), %zmm17, %k2
+
+// CHECK: vcmpps $123, 508(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b]
+ vcmpps $0x7b, 508(%rdx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, 512(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps $0x7b, 512(%rdx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, -512(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b]
+ vcmpps $0x7b, -512(%rdx){1to16}, %zmm17, %k2
+
+// CHECK: vcmpps $123, -516(%rdx){1to16}, %zmm17, %k2
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps $0x7b, -516(%rdx){1to16}, %zmm17, %k2
+
diff --git a/llvm/test/MC/X86/intel-syntax-avx512.s b/llvm/test/MC/X86/intel-syntax-avx512.s
index af4e98c..ffdbd20 100644
--- a/llvm/test/MC/X86/intel-syntax-avx512.s
+++ b/llvm/test/MC/X86/intel-syntax-avx512.s
@@ -1,34 +1,175 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -mcpu=knl --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 -mcpu=knl --show-encoding %s | FileCheck %s
-// CHECK: vaddps (%rax), %zmm1, %zmm1
+// CHECK: vaddps zmm1 , zmm1, zmmword ptr [rax]
// CHECK: encoding: [0x62,0xf1,0x74,0x48,0x58,0x08]
vaddps zmm1, zmm1, zmmword ptr [rax]
-// CHECK: vaddpd %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
vaddpd zmm1,zmm1,zmm2
-// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5}
+// CHECK: vaddpd zmm1 {k5}, zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0x4d,0x58,0xca]
vaddpd zmm1{k5},zmm1,zmm2
-// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5} {z}
+// CHECK: vaddpd zmm1 {k5} {z}, zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0xcd,0x58,0xca]
vaddpd zmm1{k5} {z},zmm1,zmm2
-// CHECK: vaddpd {rn-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {rn-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x18,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rn-sae}
-// CHECK: vaddpd {ru-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {ru-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x58,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{ru-sae}
-// CHECK: vaddpd {rd-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {rd-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x38,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rd-sae}
-// CHECK: vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
+// CHECK: vaddpd zmm1 , zmm1, zmm2, {rz-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x78,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rz-sae}
+// CHECK: vcmppd k2 , zmm12, zmm26, 171
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab]
+ vcmppd k2,zmm12,zmm26,0xab
+
+// CHECK: vcmppd k2 {k3}, zmm12, zmm26, 171
+// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
+ vcmppd k2{k3},zmm12,zmm26,0xab
+
+// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 171
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
+ vcmppd k2,zmm12,zmm26,{sae},0xab
+
+// CHECK: vcmppd k2 , zmm12, zmm26, 123
+// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
+ vcmppd k2 ,zmm12,zmm26,0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 123
+// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
+ vcmppd k2,zmm12,zmm26,{sae},0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rcx],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd k2 ,zmm12,zmmword PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rcx]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rcx]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8128], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx+0x1fc0],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx+0x2000],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx-0x2000],0x7b
+
+// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8256], 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmppd k2,zmm12,zmmword PTR [rdx-0x2040],0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1016]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx+0x3f8]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1024]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx+0x400]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1024]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx-0x400]{1to8},0x7b
+
+// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1032]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd k2,zmm12,QWORD PTR [rdx-0x408]{1to8},0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmm22, 171
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab]
+ vcmpps k2,zmm17,zmm22,0xab
+
+// CHECK: vcmpps k2 {k3}, zmm17, zmm22, 171
+// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
+ vcmpps k2{k3},zmm17,zmm22,0xab
+
+// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 171
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
+ vcmpps k2,zmm17,zmm22,{sae},0xab
+
+// CHECK: vcmpps k2 , zmm17, zmm22, 123
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
+ vcmpps k2,zmm17,zmm22,0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 123
+// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
+ vcmpps k2,zmm17,zmm22,{sae},0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rcx],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rcx]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rcx]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8128], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx+0x1fc0],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx+0x2000],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8192], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx-0x2000],0x7b
+
+// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8256], 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vcmpps k2,zmm17,zmmword PTR [rdx-0x2040],0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 508]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx+0x1fc]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 512]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx+0x200]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 512]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx-0x200]{1to16},0x7b
+
+// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 516]{1to16}, 123
+// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps k2,zmm17,DWORD PTR [rdx-0x204]{1to16},0x7b
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
new file mode 100644
index 0000000..6de59da
--- /dev/null
+++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
@@ -0,0 +1,225 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcmppd k3 , xmm27, xmm23, 171
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab]
+ vcmppd k3,xmm27,xmm23,0xab
+
+// CHECK: vcmppd k3 {k5}, xmm27, xmm23, 171
+// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab]
+ vcmppd k3{k5},xmm27,xmm23,0xab
+
+// CHECK: vcmppd k3 , xmm27, xmm23, 123
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b]
+ vcmppd k3,xmm27,xmm23,0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rcx]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rcx]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2032], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx+0x7f0],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2048], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx+0x800],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2048], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx-0x800],0x7b
+
+// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2064], 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmppd k3,xmm27,XMMWORD PTR [rdx-0x810],0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1016]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx+0x3f8]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1024]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx+0x400]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1024]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx-0x400]{1to2},0x7b
+
+// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1032]{1to2}, 123
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd k3,xmm27,QWORD PTR [rdx-0x408]{1to2},0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymm27, 171
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab]
+ vcmppd k4,ymm17,ymm27,0xab
+
+// CHECK: vcmppd k4 {k7}, ymm17, ymm27, 171
+// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab]
+ vcmppd k4{k7},ymm17,ymm27,0xab
+
+// CHECK: vcmppd k4 , ymm17, ymm27, 123
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b]
+ vcmppd k4,ymm17,ymm27,0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rcx]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rcx]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4064], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx+0xfe0],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4096], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx+0x1000],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4096], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1000],0x7b
+
+// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4128], 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1020],0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1016]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx+0x3f8]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1024]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx+0x400]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1024]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx-0x400]{1to4},0x7b
+
+// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1032]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd k4,ymm17,QWORD PTR [rdx-0x408]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmm28, 171
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab]
+ vcmpps k4,xmm29,xmm28,0xab
+
+// CHECK: vcmpps k4 {k2}, xmm29, xmm28, 171
+// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab]
+ vcmpps k4{k2},xmm29,xmm28,0xab
+
+// CHECK: vcmpps k4 , xmm29, xmm28, 123
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b]
+ vcmpps k4,xmm29,xmm28,0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rcx]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rcx]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2032], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx+0x7f0],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2048], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx+0x800],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2048], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx-0x800],0x7b
+
+// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2064], 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmpps k4,xmm29,XMMWORD PTR [rdx-0x810],0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 508]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx+0x1fc]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 512]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx+0x200]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 512]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx-0x200]{1to4},0x7b
+
+// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 516]{1to4}, 123
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps k4,xmm29,DWORD PTR [rdx-0x204]{1to4},0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymm18, 171
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab]
+ vcmpps k4,ymm19,ymm18,0xab
+
+// CHECK: vcmpps k4 {k1}, ymm19, ymm18, 171
+// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab]
+ vcmpps k4{k1},ymm19,ymm18,0xab
+
+// CHECK: vcmpps k4 , ymm19, ymm18, 123
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b]
+ vcmpps k4,ymm19,ymm18,0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rcx], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rcx],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rax+r14*8+0x123],0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rcx]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rcx]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4064], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx+0xfe0],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4096], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx+0x1000],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4096], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1000],0x7b
+
+// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4128], 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1020],0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 508]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx+0x1fc]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 512]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx+0x200]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 512]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx-0x200]{1to8},0x7b
+
+// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 516]{1to8}, 123
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps k4,ymm19,DWORD PTR [rdx-0x204]{1to8},0x7b
diff --git a/llvm/test/MC/X86/x86-64-avx512f_vl.s b/llvm/test/MC/X86/x86-64-avx512f_vl.s
index 837b030..7839f74 100644
--- a/llvm/test/MC/X86/x86-64-avx512f_vl.s
+++ b/llvm/test/MC/X86/x86-64-avx512f_vl.s
@@ -9179,3 +9179,237 @@
// CHECK: vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0xf8,0xfb,0xff,0xff]
vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
+
+// CHECK: vcmppd $171, %xmm23, %xmm27, %k3
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab]
+ vcmppd $0xab, %xmm23, %xmm27, %k3
+
+// CHECK: vcmppd $171, %xmm23, %xmm27, %k3 {%k5}
+// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab]
+ vcmppd $0xab, %xmm23, %xmm27, %k3 {%k5}
+
+// CHECK: vcmppd $123, %xmm23, %xmm27, %k3
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b]
+ vcmppd $0x7b, %xmm23, %xmm27, %k3
+
+// CHECK: vcmppd $123, (%rcx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b]
+ vcmppd $0x7b, (%rcx), %xmm27, %k3
+
+// CHECK: vcmppd $123, 291(%rax,%r14,8), %xmm27, %k3
+// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd $0x7b, 291(%rax,%r14,8), %xmm27, %k3
+
+// CHECK: vcmppd $123, (%rcx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b]
+ vcmppd $0x7b, (%rcx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, 2032(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b]
+ vcmppd $0x7b, 2032(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, 2048(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vcmppd $0x7b, 2048(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, -2048(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b]
+ vcmppd $0x7b, -2048(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, -2064(%rdx), %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmppd $0x7b, -2064(%rdx), %xmm27, %k3
+
+// CHECK: vcmppd $123, 1016(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b]
+ vcmppd $0x7b, 1016(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, 1024(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd $0x7b, 1024(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, -1024(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b]
+ vcmppd $0x7b, -1024(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $123, -1032(%rdx){1to2}, %xmm27, %k3
+// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd $0x7b, -1032(%rdx){1to2}, %xmm27, %k3
+
+// CHECK: vcmppd $171, %ymm27, %ymm17, %k4
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab]
+ vcmppd $0xab, %ymm27, %ymm17, %k4
+
+// CHECK: vcmppd $171, %ymm27, %ymm17, %k4 {%k7}
+// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab]
+ vcmppd $0xab, %ymm27, %ymm17, %k4 {%k7}
+
+// CHECK: vcmppd $123, %ymm27, %ymm17, %k4
+// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b]
+ vcmppd $0x7b, %ymm27, %ymm17, %k4
+
+// CHECK: vcmppd $123, (%rcx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b]
+ vcmppd $0x7b, (%rcx), %ymm17, %k4
+
+// CHECK: vcmppd $123, 291(%rax,%r14,8), %ymm17, %k4
+// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmppd $0x7b, 291(%rax,%r14,8), %ymm17, %k4
+
+// CHECK: vcmppd $123, (%rcx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b]
+ vcmppd $0x7b, (%rcx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, 4064(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmppd $123, 4064(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, 4096(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmppd $0x7b, 4096(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, -4096(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b]
+ vcmppd $0x7b, -4096(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, -4128(%rdx), %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmppd $0x7b, -4128(%rdx), %ymm17, %k4
+
+// CHECK: vcmppd $123, 1016(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmppd $0x7b, 1016(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, 1024(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vcmppd $0x7b, 1024(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, -1024(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b]
+ vcmppd $0x7b, -1024(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmppd $123, -1032(%rdx){1to4}, %ymm17, %k4
+// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmppd $0x7b, -1032(%rdx){1to4}, %ymm17, %k4
+
+// CHECK: vcmpps $171, %xmm28, %xmm29, %k4
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab]
+ vcmpps $0xab, %xmm28, %xmm29, %k4
+
+// CHECK: vcmpps $171, %xmm28, %xmm29, %k4 {%k2}
+// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab]
+ vcmpps $0xab, %xmm28, %xmm29, %k4 {%k2}
+
+// CHECK: vcmpps $123, %xmm28, %xmm29, %k4
+// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b]
+ vcmpps $0x7b, %xmm28, %xmm29, %k4
+
+// CHECK: vcmpps $123, (%rcx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx), %xmm29, %k4
+
+// CHECK: vcmpps $123, 291(%rax,%r14,8), %xmm29, %k4
+// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps $0x7b, 291(%rax,%r14,8), %xmm29, %k4
+
+// CHECK: vcmpps $123, (%rcx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, 2032(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b]
+ vcmpps $0x7b, 2032(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, 2048(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vcmpps $0x7b, 2048(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, -2048(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -2048(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, -2064(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vcmpps $0x7b, -2064(%rdx), %xmm29, %k4
+
+// CHECK: vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b]
+ vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, -512(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -512(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $123, -516(%rdx){1to4}, %xmm29, %k4
+// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps $0x7b, -516(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vcmpps $171, %ymm18, %ymm19, %k4
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab]
+ vcmpps $0xab, %ymm18, %ymm19, %k4
+
+// CHECK: vcmpps $171, %ymm18, %ymm19, %k4 {%k1}
+// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab]
+ vcmpps $0xab, %ymm18, %ymm19, %k4 {%k1}
+
+// CHECK: vcmpps $123, %ymm18, %ymm19, %k4
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b]
+ vcmpps $0x7b, %ymm18, %ymm19, %k4
+
+// CHECK: vcmpps $123, (%rcx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx), %ymm19, %k4
+
+// CHECK: vcmpps $123, 291(%rax,%r14,8), %ymm19, %k4
+// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpps $0x7b, 291(%rax,%r14,8), %ymm19, %k4
+
+// CHECK: vcmpps $123, (%rcx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b]
+ vcmpps $0x7b, (%rcx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, 4064(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b]
+ vcmpps $0x7b, 4064(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, 4096(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vcmpps $0x7b, 4096(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, -4096(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -4096(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, -4128(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vcmpps $0x7b, -4128(%rdx), %ymm19, %k4
+
+// CHECK: vcmpps $123, 508(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b]
+ vcmpps $0x7b, 508(%rdx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, 512(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpps $0x7b, 512(%rdx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, -512(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b]
+ vcmpps $0x7b, -512(%rdx){1to8}, %ymm19, %k4
+
+// CHECK: vcmpps $123, -516(%rdx){1to8}, %ymm19, %k4
+// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpps $0x7b, -516(%rdx){1to8}, %ymm19, %k4
+
+
+
+
+
+
+
+
+
+