Add implicit null and stack checks for x86

This adds compiler and runtime changes for x86
implicit checks.  32 bit only.

Both host and target are supported.
By default, on the host, the implicit checks are null pointer and
stack overflow.  Suspend is implemented but not switched on.

Change-Id: I88a609e98d6bf32f283eaa4e6ec8bbf8dc1df78a
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 8df5b6d..ebe3f0a 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -271,21 +271,22 @@
   { kX86Shrd64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" },
   { kX86Shrd64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
 
-  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
-  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
-  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
-  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
-  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
-  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
-  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
+  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
+  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
+  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
+  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
+  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
+  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" },
   { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" },
   { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
 
-  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
+  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
   { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" },
+  { kX86Test32RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RM", "!0r,[!1r+!1d]" },
 
 #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
                            reg, reg_kind, reg_flags, \
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 9000514..916198d 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -222,15 +222,27 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
 
-  /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
-
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
-      !IsLargeFrame(frame_size_, cu_->target64 ? kX86_64 : kX86);
+  InstructionSet isa =  cu_->target64 ? kX86_64 : kX86;
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, isa);
+
+  // If we doing an implicit stack overflow check, perform the load immediately
+  // before the stack pointer is decremented and anything is saved.
+  if (!skip_overflow_check && !Runtime::Current()->ExplicitStackOverflowChecks()) {
+    // Implicit stack overflow check.
+    // test eax,[esp + -overflow]
+    int overflow = GetStackOverflowReservedBytes(isa);
+    NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rX86_SP.GetReg(), -overflow);
+    MarkPossibleStackOverflowException();
+  }
+
+  /* Build frame, return address already on stack */
+  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ -
+                              GetInstructionSetPointerSize(cu_->instruction_set));
+
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -261,25 +273,27 @@
      private:
       const size_t sp_displace_;
     };
-    // TODO: for large frames we should do something like:
-    // spill ebp
-    // lea ebp, [esp + frame_size]
-    // cmp ebp, fs:[stack_end_]
-    // jcc stack_overflow_exception
-    // mov esp, ebp
-    // in case a signal comes in that's not using an alternate signal stack and the large frame may
-    // have moved us outside of the reserved area at the end of the stack.
-    // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-    if (cu_->target64) {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
-    } else {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
-    }
-    LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      // TODO: for large frames we should do something like:
+      // spill ebp
+      // lea ebp, [esp + frame_size]
+      // cmp ebp, fs:[stack_end_]
+      // jcc stack_overflow_exception
+      // mov esp, ebp
+      // in case a signal comes in that's not using an alternate signal stack and the large frame
+      // may have moved us outside of the reserved area at the end of the stack.
+      // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
+      if (cu_->target64) {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
+      } else {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
+      }
+      LIR* branch = OpCondBranch(kCondUlt, nullptr);
+      AddSlowPath(
         new(arena_)StackOverflowSlowPath(this, branch,
                                          frame_size_ -
                                          GetInstructionSetPointerSize(cu_->instruction_set)));
+    }
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -318,4 +332,14 @@
   NewLIR0(kX86Ret);
 }
 
+void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return;
+  }
+  // Implicit null pointer check.
+  // test eax,[arg1+0]
+  NewLIR3(kX86Test32RM, rs_rAX.GetReg(), reg.GetReg(), 0);
+  MarkPossibleNullPointerException(opt_flags);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ff7b30e..7e77364 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -85,6 +85,7 @@
   LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                             RegStorage r_src, OpSize size) OVERRIDE;
   void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+  void GenImplicitNullCheck(RegStorage reg, int opt_flags);
 
   // Required for target - register utilities.
   RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
@@ -796,9 +797,11 @@
    * @param base_reg The register holding the base address.
    * @param offset The offset from the base.
    * @param check_value The immediate to compare to.
+   * @param target branch target (or nullptr)
+   * @param compare output for getting LIR for comparison (or nullptr)
    */
   LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                         int offset, int check_value, LIR* target);
+                         int offset, int check_value, LIR* target, LIR** compare);
 
   /*
    * Can this operation be using core registers without temporaries?
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index f1166f6..e8118dc 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1092,6 +1092,7 @@
   };
 
   OpRegMem(kOpCmp, index, array_base, len_offset);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondUge, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
@@ -1132,6 +1133,7 @@
   };
 
   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondLs, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e81f505..6731b38 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -825,8 +825,10 @@
 }
 
 LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
-  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
-  return nullptr;
+  // First load the pointer in fs:[suspend-trigger] into eax
+  // Then use a test instruction to indirect via that address.
+  NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),  Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
+  return NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rAX.GetReg(), 0);
 }
 
 uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
@@ -1189,6 +1191,7 @@
   // Is the string non-NULL?
   LoadValueDirectFixed(rl_obj, rs_rDX);
   GenNullCheck(rs_rDX, info->opt_flags);
+  // uint32_t opt_flags = info->opt_flags;
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
 
   // Does the character fit in 16 bits?
@@ -1215,12 +1218,20 @@
   // Character is in EAX.
   // Object pointer is in EDX.
 
+  // Compute the number of words to search in to rCX.
+  Load32Disp(rs_rDX, count_offset, rs_rCX);
+
+  // Possible signal here due to null pointer dereference.
+  // Note that the signal handler will expect the top word of
+  // the stack to be the ArtMethod*.  If the PUSH edi instruction
+  // below is ahead of the load above then this will not be true
+  // and the signal handler will not work.
+  MarkPossibleNullPointerException(0);
+
   // We need to preserve EDI, but have no spare registers, so push it on the stack.
   // We have to remember that all stack addresses after this are offset by sizeof(EDI).
   NewLIR1(kX86Push32R, rs_rDI.GetReg());
 
-  // Compute the number of words to search in to rCX.
-  Load32Disp(rs_rDX, count_offset, rs_rCX);
   LIR *length_compare = nullptr;
   int start_value = 0;
   bool is_index_on_stack = false;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 657160f..09fce91 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -684,9 +684,9 @@
     } else {
       DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
       if (r_base == r_dest.GetLow()) {
-        load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
+        load = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
                         displacement + HIWORD_OFFSET);
-        load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
+        load2 = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
       } else {
         load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
         load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
@@ -712,16 +712,16 @@
         if (r_dest.GetHigh() == r_index) {
           // We can't use either register for the first load.
           RegStorage temp = AllocTemp();
-          load2 = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
           OpRegCopy(r_dest.GetHigh(), temp);
           FreeTemp(temp);
         } else {
-          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
         }
       } else {
@@ -744,6 +744,7 @@
     }
   }
 
+  // Always return first load generated as this might cause a fault if base is nullptr.
   return load;
 }
 
@@ -881,9 +882,12 @@
 }
 
 LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target) {
-    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(), offset,
-            check_value);
+                                   int offset, int check_value, LIR* target, LIR** compare) {
+    LIR* inst = NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(),
+            offset, check_value);
+    if (compare != nullptr) {
+        *compare = inst;
+    }
     LIR* branch = OpCondBranch(cond, target);
     return branch;
 }
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 2789923..1ec3d41 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -497,6 +497,7 @@
   UnaryOpcode(kX86Test, RI, MI, AI),
   kX86Test32RR,
   kX86Test64RR,
+  kX86Test32RM,
   UnaryOpcode(kX86Not, R, M, A),
   UnaryOpcode(kX86Neg, R, M, A),
   UnaryOpcode(kX86Mul,  DaR, DaM, DaA),