[BPF] add code-gen support for JMP32 instructions

JMP32 instructions has been added to eBPF ISA. They are 32-bit variants of
existing BPF conditional jump instructions, but the comparison happens on
low 32-bit sub-register only, therefore some unnecessary extensions could
be saved.

JMP32 instructions will only be available for -mcpu=v3. Host probe hook has
been updated accordingly.

JMP32 instructions will only be enabled in code-gen when -mattr=+alu32
enabled, meaning compiling the program using sub-register mode.

For JMP32 encoding, it is a new instruction class, and is using the
reserved eBPF class number 0x6.

This patch has been tested by compiling and running kernel bpf selftests
with JMP32 enabled.

Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
llvm-svn: 353384
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index c81b1f1..1b907bc 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -330,7 +330,19 @@
 #if !defined(__linux__) || !defined(__x86_64__)
   return "generic";
 #else
-  uint8_t insns[40] __attribute__ ((aligned (8))) =
+  uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
+      /* BPF_MOV64_IMM(BPF_REG_0, 0) */
+    { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+      /* BPF_MOV64_IMM(BPF_REG_2, 1) */
+      0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
+      /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
+      0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
+      /* BPF_MOV64_IMM(BPF_REG_0, 1) */
+      0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
+      /* BPF_EXIT_INSN() */
+      0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+
+  uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
@@ -355,10 +367,23 @@
   } attr = {};
   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
   attr.insn_cnt = 5;
-  attr.insns = (uint64_t)insns;
+  attr.insns = (uint64_t)v3_insns;
   attr.license = (uint64_t)"DUMMY";
 
-  int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
+  int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
+                   sizeof(attr));
+  if (fd >= 0) {
+    close(fd);
+    return "v3";
+  }
+
+  /* Clear the whole attr in case its content changed by syscall. */
+  memset(&attr, 0, sizeof(attr));
+  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
+  attr.insn_cnt = 5;
+  attr.insns = (uint64_t)v2_insns;
+  attr.license = (uint64_t)"DUMMY";
+  fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
   if (fd >= 0) {
     close(fd);
     return "v2";
diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
index dd97698..fad966f 100644
--- a/llvm/lib/Target/BPF/BPF.td
+++ b/llvm/lib/Target/BPF/BPF.td
@@ -20,6 +20,7 @@
 def : Proc<"generic", []>;
 def : Proc<"v1", []>;
 def : Proc<"v2", []>;
+def : Proc<"v3", []>;
 def : Proc<"probe", []>;
 
 def DummyFeature : SubtargetFeature<"dummy", "isDummyMode",
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index e37df6e..ff69941 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -105,7 +105,8 @@
 
   if (STI.getHasAlu32()) {
     setOperationAction(ISD::BSWAP, MVT::i32, Promote);
-    setOperationAction(ISD::BR_CC, MVT::i32, Promote);
+    setOperationAction(ISD::BR_CC, MVT::i32,
+                       STI.getHasJmp32() ? Custom : Promote);
   }
 
   setOperationAction(ISD::CTTZ, MVT::i64, Custom);
@@ -162,6 +163,7 @@
 
   // CPU/Feature control
   HasAlu32 = STI.getHasAlu32();
+  HasJmp32 = STI.getHasJmp32();
   HasJmpExt = STI.getHasJmpExt();
 }
 
@@ -506,7 +508,7 @@
     NegateCC(LHS, RHS, CC);
 
   return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
-                     DAG.getConstant(CC, DL, MVT::i64), Dest);
+                     DAG.getConstant(CC, DL, LHS.getValueType()), Dest);
 }
 
 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -676,36 +678,23 @@
   int CC = MI.getOperand(3).getImm();
   int NewCC;
   switch (CC) {
-  case ISD::SETGT:
-    NewCC = isSelectRROp ? BPF::JSGT_rr : BPF::JSGT_ri;
-    break;
-  case ISD::SETUGT:
-    NewCC = isSelectRROp ? BPF::JUGT_rr : BPF::JUGT_ri;
-    break;
-  case ISD::SETGE:
-    NewCC = isSelectRROp ? BPF::JSGE_rr : BPF::JSGE_ri;
-    break;
-  case ISD::SETUGE:
-    NewCC = isSelectRROp ? BPF::JUGE_rr : BPF::JUGE_ri;
-    break;
-  case ISD::SETEQ:
-    NewCC = isSelectRROp ? BPF::JEQ_rr : BPF::JEQ_ri;
-    break;
-  case ISD::SETNE:
-    NewCC = isSelectRROp ? BPF::JNE_rr : BPF::JNE_ri;
-    break;
-  case ISD::SETLT:
-    NewCC = isSelectRROp ? BPF::JSLT_rr : BPF::JSLT_ri;
-    break;
-  case ISD::SETULT:
-    NewCC = isSelectRROp ? BPF::JULT_rr : BPF::JULT_ri;
-    break;
-  case ISD::SETLE:
-    NewCC = isSelectRROp ? BPF::JSLE_rr : BPF::JSLE_ri;
-    break;
-  case ISD::SETULE:
-    NewCC = isSelectRROp ? BPF::JULE_rr : BPF::JULE_ri;
-    break;
+#define SET_NEWCC(X, Y) \
+  case ISD::X: \
+    if (is32BitCmp && HasJmp32) \
+      NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
+    else \
+      NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
+    break
+  SET_NEWCC(SETGT, JSGT);
+  SET_NEWCC(SETUGT, JUGT);
+  SET_NEWCC(SETGE, JSGE);
+  SET_NEWCC(SETUGE, JUGE);
+  SET_NEWCC(SETEQ, JEQ);
+  SET_NEWCC(SETNE, JNE);
+  SET_NEWCC(SETLT, JSLT);
+  SET_NEWCC(SETULT, JULT);
+  SET_NEWCC(SETLE, JSLE);
+  SET_NEWCC(SETULE, JULE);
   default:
     report_fatal_error("unimplemented select CondCode " + Twine(CC));
   }
@@ -723,13 +712,13 @@
   //
   // We simply do extension for all situations in this method, but we will
   // try to remove those unnecessary in BPFMIPeephole pass.
-  if (is32BitCmp)
+  if (is32BitCmp && !HasJmp32)
     LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
 
   if (isSelectRROp) {
     unsigned RHS = MI.getOperand(2).getReg();
 
-    if (is32BitCmp)
+    if (is32BitCmp && !HasJmp32)
       RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
 
     BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index cafc6d4..2a44165 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -55,6 +55,7 @@
                               MachineBasicBlock *BB) const override;
 
   bool getHasAlu32() const { return HasAlu32; }
+  bool getHasJmp32() const { return HasJmp32; }
   bool getHasJmpExt() const { return HasJmpExt; }
 
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -65,6 +66,7 @@
 private:
   // Control Instruction Selection Features
   bool HasAlu32;
+  bool HasJmp32;
   bool HasJmpExt;
 
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
index c5e712f..9f00dc8 100644
--- a/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -16,6 +16,7 @@
 def BPF_STX   : BPFOpClass<0x3>;
 def BPF_ALU   : BPFOpClass<0x4>;
 def BPF_JMP   : BPFOpClass<0x5>;
+def BPF_JMP32 : BPFOpClass<0x6>;
 def BPF_ALU64 : BPFOpClass<0x7>;
 
 class BPFSrcType<bits<1> val> {
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 4c4b214..36724b8 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -101,6 +101,26 @@
                          [{return (N->getZExtValue() == ISD::SETULT);}]>;
 def BPF_CC_LEU : PatLeaf<(i64 imm),
                          [{return (N->getZExtValue() == ISD::SETULE);}]>;
+def BPF_CC_EQ_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETEQ);}]>;
+def BPF_CC_NE_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETNE);}]>;
+def BPF_CC_GE_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETGE);}]>;
+def BPF_CC_GT_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETGT);}]>;
+def BPF_CC_GTU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETUGT);}]>;
+def BPF_CC_GEU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETUGE);}]>;
+def BPF_CC_LE_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETLE);}]>;
+def BPF_CC_LT_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETLT);}]>;
+def BPF_CC_LTU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETULT);}]>;
+def BPF_CC_LEU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETULE);}]>;
 
 // For arithmetic and jump instructions the 8-bit 'code'
 // field is divided into three parts:
@@ -166,23 +186,57 @@
   let BPFClass = BPF_JMP;
 }
 
-multiclass J<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond> {
+class JMP_RR_32<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
+    : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
+                   (outs),
+                   (ins GPR32:$dst, GPR32:$src, brtarget:$BrDst),
+                   "if $dst "#OpcodeStr#" $src goto $BrDst",
+                   [(BPFbrcc i32:$dst, i32:$src, Cond, bb:$BrDst)]> {
+  bits<4> dst;
+  bits<4> src;
+  bits<16> BrDst;
+
+  let Inst{55-52} = src;
+  let Inst{51-48} = dst;
+  let Inst{47-32} = BrDst;
+  let BPFClass = BPF_JMP32;
+}
+
+class JMP_RI_32<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
+    : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
+                   (outs),
+                   (ins GPR32:$dst, i32imm:$imm, brtarget:$BrDst),
+                   "if $dst "#OpcodeStr#" $imm goto $BrDst",
+                   [(BPFbrcc i32:$dst, i32immSExt32:$imm, Cond, bb:$BrDst)]> {
+  bits<4> dst;
+  bits<16> BrDst;
+  bits<32> imm;
+
+  let Inst{51-48} = dst;
+  let Inst{47-32} = BrDst;
+  let Inst{31-0} = imm;
+  let BPFClass = BPF_JMP32;
+}
+
+multiclass J<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond, PatLeaf Cond32> {
   def _rr : JMP_RR<Opc, OpcodeStr, Cond>;
   def _ri : JMP_RI<Opc, OpcodeStr, Cond>;
+  def _rr_32 : JMP_RR_32<Opc, OpcodeStr, Cond32>;
+  def _ri_32 : JMP_RI_32<Opc, OpcodeStr, Cond32>;
 }
 
 let isBranch = 1, isTerminator = 1, hasDelaySlot=0 in {
 // cmp+goto instructions
-defm JEQ  : J<BPF_JEQ, "==",  BPF_CC_EQ>;
-defm JUGT : J<BPF_JGT, ">", BPF_CC_GTU>;
-defm JUGE : J<BPF_JGE, ">=", BPF_CC_GEU>;
-defm JNE  : J<BPF_JNE, "!=",  BPF_CC_NE>;
-defm JSGT : J<BPF_JSGT, "s>", BPF_CC_GT>;
-defm JSGE : J<BPF_JSGE, "s>=", BPF_CC_GE>;
-defm JULT : J<BPF_JLT, "<", BPF_CC_LTU>;
-defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU>;
-defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT>;
-defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE>;
+defm JEQ  : J<BPF_JEQ, "==",  BPF_CC_EQ, BPF_CC_EQ_32>;
+defm JUGT : J<BPF_JGT, ">", BPF_CC_GTU, BPF_CC_GTU_32>;
+defm JUGE : J<BPF_JGE, ">=", BPF_CC_GEU, BPF_CC_GEU_32>;
+defm JNE  : J<BPF_JNE, "!=",  BPF_CC_NE, BPF_CC_NE_32>;
+defm JSGT : J<BPF_JSGT, "s>", BPF_CC_GT, BPF_CC_GT_32>;
+defm JSGE : J<BPF_JSGE, "s>=", BPF_CC_GE, BPF_CC_GE_32>;
+defm JULT : J<BPF_JLT, "<", BPF_CC_LTU, BPF_CC_LTU_32>;
+defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU, BPF_CC_LEU_32>;
+defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
+defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
 }
 
 // ALU instructions
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 626f0c5..ab34525 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -35,6 +35,7 @@
 
 void BPFSubtarget::initializeEnvironment() {
   HasJmpExt = false;
+  HasJmp32 = false;
   HasAlu32 = false;
   UseDwarfRIS = false;
 }
@@ -48,6 +49,11 @@
     HasJmpExt = true;
     return;
   }
+  if (CPU == "v3") {
+    HasJmpExt = true;
+    HasJmp32 = true;
+    return;
+  }
 }
 
 BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index a9b8966..3da6a02 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -47,6 +47,10 @@
   // whether the cpu supports jmp ext
   bool HasJmpExt;
 
+  // whether the cpu supports jmp32 ext.
+  // NOTE: jmp32 is not enabled when alu32 enabled.
+  bool HasJmp32;
+
   // whether the cpu supports alu32 instructions.
   bool HasAlu32;
 
@@ -65,6 +69,7 @@
   // subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   bool getHasJmpExt() const { return HasJmpExt; }
+  bool getHasJmp32() const { return HasJmp32; }
   bool getHasAlu32() const { return HasAlu32; }
   bool getUseDwarfRIS() const { return UseDwarfRIS; }
 
diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 43de8b6..ed09e44 100644
--- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -39,7 +39,7 @@
     BPF_STX = 0x3,
     BPF_ALU = 0x4,
     BPF_JMP = 0x5,
-    BPF_RES = 0x6,
+    BPF_JMP32 = 0x6,
     BPF_ALU64 = 0x7
   };