[ARM] Make fullfp16 instructions not conditionalisable.

More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.

In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.

(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)

So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.

I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.

Reviewers: SjoerdMeijer, t.p.northover, efriedma

Reviewed By: efriedma

Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D57823

llvm-svn: 354768
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 8776439..00a7202 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -1555,6 +1555,8 @@
 
   // Loads & stores operate on both NEON and VFP pipelines.
   let D = VFPNeonDomain;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // VFP Load / store multiple pseudo instructions.
@@ -1902,6 +1904,8 @@
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{7-6}   = opcod4;
   let Inst{4}     = opcod5;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // Half precision, unary, non-predicated
@@ -1930,6 +1934,8 @@
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{7-6}   = opcod4;
   let Inst{4}     = opcod5;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // Half precision, binary
@@ -1956,6 +1962,8 @@
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{6}     = op6;
   let Inst{4}     = op4;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // Half precision, binary, not predicated
@@ -1985,6 +1993,8 @@
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{6}     = opcod3;
   let Inst{4}     = 0;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // VFP conversion instructions
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 8773c16..7871b18 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -129,6 +129,7 @@
   let D = VFPNeonDomain;
 }
 
+let isUnpredicable = 1 in
 def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
                  IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
                  [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
@@ -148,6 +149,7 @@
   let D = VFPNeonDomain;
 }
 
+let isUnpredicable = 1 in
 def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
                  IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
                  [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
@@ -451,7 +453,7 @@
 
 multiclass vsel_inst<string op, bits<2> opc, int CC> {
   let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
-      Uses = [CPSR], AddedComplexity = 4 in {
+      Uses = [CPSR], AddedComplexity = 4, isUnpredicable = 1 in {
     def H : AHbInp<0b11100, opc, 0,
                    (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                    NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
@@ -479,7 +481,8 @@
 defm VSELVS : vsel_inst<"vs", 0b01, 6>;
 
 multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
-  let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+  let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+      isUnpredicable = 1 in {
     def H : AHbInp<0b11101, 0b00, opc,
                    (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                    NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
@@ -947,7 +950,8 @@
 
 multiclass vrint_inst_anpm<string opc, bits<2> rm,
                            SDPatternOperator node = null_frag> {
-  let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+  let PostEncoderMethod = "", DecoderNamespace = "VFPV8",
+      isUnpredicable = 1 in {
     def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
                    (outs SPR:$Sd), (ins SPR:$Sm),
                    NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
@@ -1012,7 +1016,7 @@
                   IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
 } // isMoveReg
 
-let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
 def VMOVH  : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs SPR:$Sd), (ins SPR:$Sm),
                   IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>,
@@ -1221,6 +1225,8 @@
 
   let Inst{6-5}   = 0b00;
   let Inst{3-0}   = 0b0000;
+
+  let isUnpredicable = 1;
 }
 
 // Move R->H, clearing top 16 bits
@@ -1241,6 +1247,8 @@
 
   let Inst{6-5}   = 0b00;
   let Inst{3-0}   = 0b0000;
+
+  let isUnpredicable = 1;
 }
 
 // FMRDH: SPR -> GPR
@@ -1347,6 +1355,7 @@
                                []>,
              Sched<[WriteFPCVT]> {
   let Inst{7} = 1; // s32
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(f16 (sint_to_fp GPR:$a)),
@@ -1392,6 +1401,7 @@
                                 []>,
              Sched<[WriteFPCVT]> {
   let Inst{7} = 0; // u32
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(f16 (uint_to_fp GPR:$a)),
@@ -1496,6 +1506,7 @@
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 1; // Z bit
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(i32 (fp_to_sint HPR:$a)),
@@ -1542,6 +1553,7 @@
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 1; // Z bit
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(i32 (fp_to_uint HPR:$a)),
@@ -1571,6 +1583,7 @@
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 0; // Z bit
+  let isUnpredicable = 1;
 }
 
 def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -1595,6 +1608,7 @@
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 0; // Z bit
+  let isUnpredicable = 1;
 }
 }
 
@@ -1642,6 +1656,8 @@
   let Predicates = [HasVFP2, HasDPVFP];
 }
 
+let isUnpredicable = 1 in {
+
 def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
@@ -1666,6 +1682,8 @@
              Requires<[HasFullFP16]>,
              Sched<[WriteFPCVT]>;
 
+} // End of 'let isUnpredicable = 1 in'
+
 def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
@@ -1721,6 +1739,8 @@
 
 // Fixed-Point to FP:
 
+let isUnpredicable = 1 in {
+
 def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
@@ -1745,6 +1765,8 @@
              Requires<[HasFullFP16]>,
              Sched<[WriteFPCVT]>;
 
+} // End of 'let isUnpredicable = 1 in'
+
 def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
@@ -2370,6 +2392,8 @@
   let Inst{11-8}  = 0b1001;     // Half precision
   let Inst{7-4}   = 0b0000;
   let Inst{3-0}   = imm{3-0};
+
+  let isUnpredicable = 1;
 }
 }
 
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 5682c2a..ef7f0be 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6478,6 +6478,18 @@
              Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
                  ARMCC::AL) {
     return Warning(Loc, "predicated instructions should be in IT block");
+  } else if (!MCID.isPredicable()) {
+    // Check the instruction doesn't have a predicate operand anyway
+    // that it's not allowed to use. Sometimes this happens in order
+    // to keep instructions the same shape even though one cannot
+    // legally be predicated, e.g. vmul.f16 vs vmul.f32.
+    for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+      if (MCID.OpInfo[i].isPredicate()) {
+        if (Inst.getOperand(i).getImm() != ARMCC::AL)
+          return Error(Loc, "instruction is not predicable");
+        break;
+      }
+    }
   }
 
   // PC-setting instructions in an IT block, but not the last instruction of
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 1ac1ade..b65a075 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -119,7 +119,7 @@
   mutable ITStatus ITBlock;
 
   DecodeStatus AddThumbPredicate(MCInst&) const;
-  void UpdateThumbVFPPredicate(MCInst&) const;
+  void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const;
 };
 
 } // end anonymous namespace
@@ -630,6 +630,8 @@
   for (unsigned i = 0; i < NumOps; ++i, ++I) {
     if (I == MI.end()) break;
     if (OpInfo[i].isPredicate()) {
+      if (CC != ARMCC::AL && !ARMInsts[MI.getOpcode()].isPredicable())
+        Check(S, SoftFail);
       I = MI.insert(I, MCOperand::createImm(CC));
       ++I;
       if (CC == ARMCC::AL)
@@ -655,7 +657,8 @@
 // mode, the auto-generated decoder will give them an (incorrect)
 // predicate operand.  We need to rewrite these operands based on the IT
 // context as a post-pass.
-void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+void ThumbDisassembler::UpdateThumbVFPPredicate(
+  DecodeStatus &S, MCInst &MI) const {
   unsigned CC;
   CC = ITBlock.getITCC();
   if (CC == 0xF)
@@ -668,6 +671,8 @@
   unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
   for (unsigned i = 0; i < NumOps; ++i, ++I) {
     if (OpInfo[i].isPredicate() ) {
+      if (CC != ARMCC::AL && !ARMInsts[MI.getOpcode()].isPredicable())
+        Check(S, SoftFail);
       I->setImm(CC);
       ++I;
       if (CC == ARMCC::AL)
@@ -773,7 +778,7 @@
         decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
     if (Result != MCDisassembler::Fail) {
       Size = 4;
-      UpdateThumbVFPPredicate(MI);
+      UpdateThumbVFPPredicate(Result, MI);
       return Result;
     }
   }
@@ -1110,16 +1115,19 @@
 
 static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
   if (Val == 0xF) return MCDisassembler::Fail;
   // AL predicate is not allowed on Thumb1 branches.
   if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
     return MCDisassembler::Fail;
+  if (Val != ARMCC::AL && !ARMInsts[Inst.getOpcode()].isPredicable())
+    Check(S, MCDisassembler::SoftFail);
   Inst.addOperand(MCOperand::createImm(Val));
   if (Val == ARMCC::AL) {
     Inst.addOperand(MCOperand::createReg(0));
   } else
     Inst.addOperand(MCOperand::createReg(ARM::CPSR));
-  return MCDisassembler::Success;
+  return S;
 }
 
 static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,