Instruction scheduling for ARM.

Performance improvements on various benchmarks with this CL:

benchmarks     improvements
---------------------------
algorithm                1%
benchmarksgame           2%
caffeinemark             2%
math                     3%
stanford                 4%

Tested on ARM Cortex-A53 CPU.

The code size impact is negligible.

Test: m test-art-host
Test: m test-art-target
Change-Id: I314c90c09ce27e3d224fc686ef73c7d94a6b5a2c
diff --git a/compiler/Android.bp b/compiler/Android.bp
index a2b07af..df896dc 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -115,6 +115,7 @@
                 "optimizing/intrinsics_arm.cc",
                 "optimizing/intrinsics_arm_vixl.cc",
                 "optimizing/nodes_shared.cc",
+                "optimizing/scheduler_arm.cc",
                 "utils/arm/assembler_arm.cc",
                 "utils/arm/assembler_arm_vixl.cc",
                 "utils/arm/assembler_thumb2.cc",
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 065c11e..f928f71 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -638,11 +638,14 @@
           new (arena) arm::InstructionSimplifierArm(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
       GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+      HInstructionScheduling* scheduling =
+          new (arena) HInstructionScheduling(graph, instruction_set, codegen);
       HOptimization* arm_optimizations[] = {
         simplifier,
         side_effects,
         gvn,
-        fixups
+        fixups,
+        scheduling,
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index d65d20c..ac0ccf5 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -23,6 +23,10 @@
 #include "scheduler_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
 namespace art {
 
 void SchedulingGraph::AddDependency(SchedulingNode* node,
@@ -580,28 +584,39 @@
 
 void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
                                  bool schedule_randomly) {
+#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
+  // Phase-local allocator that allocates scheduler internal data structures like
+  // scheduling nodes, internel nodes map, dependencies, etc.
+  ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+  CriticalPathSchedulingNodeSelector critical_path_selector;
+  RandomSchedulingNodeSelector random_selector;
+  SchedulingNodeSelector* selector = schedule_randomly
+      ? static_cast<SchedulingNodeSelector*>(&random_selector)
+      : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+#else
   // Avoid compilation error when compiling for unsupported instruction set.
   UNUSED(only_optimize_loop_blocks);
   UNUSED(schedule_randomly);
+#endif
   switch (instruction_set_) {
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
-      // Phase-local allocator that allocates scheduler internal data structures like
-      // scheduling nodes, internel nodes map, dependencies, etc.
-      ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
-
-      CriticalPathSchedulingNodeSelector critical_path_selector;
-      RandomSchedulingNodeSelector random_selector;
-      SchedulingNodeSelector* selector = schedule_randomly
-          ? static_cast<SchedulingNodeSelector*>(&random_selector)
-          : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
-
       arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
       scheduler.Schedule(graph_);
       break;
     }
 #endif
+#if defined(ART_ENABLE_CODEGEN_arm)
+    case kThumb2:
+    case kArm: {
+      arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
+      arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor);
+      scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+      scheduler.Schedule(graph_);
+      break;
+    }
+#endif
     default:
       break;
   }
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index 9236a0e..73e8087 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -23,6 +23,7 @@
 #include "driver/compiler_driver.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "code_generator.h"
 
 namespace art {
 
@@ -469,8 +470,9 @@
 
 class HInstructionScheduling : public HOptimization {
  public:
-  HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+  HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr)
       : HOptimization(graph, kInstructionScheduling),
+        codegen_(cg),
         instruction_set_(instruction_set) {}
 
   void Run() {
@@ -480,6 +482,7 @@
 
   static constexpr const char* kInstructionScheduling = "scheduler";
 
+  CodeGenerator* const codegen_;
   const InstructionSet instruction_set_;
 
  private:
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
new file mode 100644
index 0000000..1a89567
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -0,0 +1,822 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
+#include "common_arm.h"
+#include "mirror/array-inl.h"
+#include "scheduler_arm.h"
+
+namespace art {
+namespace arm {
+
+using helpers::Int32ConstantFrom;
+using helpers::Uint64ConstantFrom;
+
+void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimLong:
+      // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
+      // so a bubble (kArmNopLatency) is added to represent the internal carry flag
+      // dependency inside these pairs.
+      last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
+  HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
+  HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmMulFloatingPointLatency;
+      break;
+    default:
+      last_visited_latency_ = kArmMulIntegerLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = kArmIntegerOpLatency;
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
+  HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
+  HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
+  HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimInt:
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimLong: {
+      // HandleLongRotate
+      HInstruction* rhs = instr->GetRight();
+      if (rhs->IsConstant()) {
+        uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+        if (rot != 0u) {
+          last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+        } else {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+        }
+      } else {
+        last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
+        last_visited_latency_ = kArmBranchLatency;
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
+  Primitive::Type type = instr->GetResultType();
+  HInstruction* rhs = instr->GetRight();
+  switch (type) {
+    case Primitive::kPrimInt:
+      if (!rhs->IsConstant()) {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+      }
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimLong:
+      if (!rhs->IsConstant()) {
+        last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
+      } else {
+        uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+        if (shift_value == 1 || shift_value >= 32) {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+        } else {
+          last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+        }
+      }
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
+  HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
+  HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
+  HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
+  switch (instr->GetLeft()->GetType()) {
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = 4 * kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+      break;
+  }
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
+  Primitive::Type type = instr->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+      break;
+  }
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else {
+    last_visited_internal_latency_ = kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
+  if (internal_latency) {
+    last_visited_internal_latency_ += kArmIntegerOpLatency;
+  } else {
+    last_visited_latency_ = kArmDataProcWithShifterOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  if (kind == HInstruction::kAdd) {
+    last_visited_internal_latency_ = kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else if (kind == HInstruction::kSub) {
+    last_visited_internal_latency_ = kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else {
+    HandleGenerateDataProcInstruction(/* internal_latency */ true);
+    HandleGenerateDataProcInstruction();
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
+  DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+  DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+  const uint32_t shift_value = instruction->GetShiftAmount();
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+
+  if (shift_value >= 32) {
+    // Different shift types actually generate similar code here,
+    // no need to differentiate shift types like the codegen pass does,
+    // which also avoids handling shift types from different ARM backends.
+    HandleGenerateDataProc(instruction);
+  } else {
+    DCHECK_GT(shift_value, 1U);
+    DCHECK_LT(shift_value, 32U);
+
+    if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+      HandleGenerateDataProcInstruction(/* internal_latency */ true);
+      HandleGenerateDataProcInstruction(/* internal_latency */ true);
+      HandleGenerateDataProcInstruction();
+    } else {
+      last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+      HandleGenerateDataProc(instruction);
+    }
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
+  const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+
+  if (instruction->GetType() == Primitive::kPrimInt) {
+    DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+    HandleGenerateDataProcInstruction();
+  } else {
+    DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+    if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+      HandleGenerateDataProc(instruction);
+    } else {
+      HandleGenerateLongDataProc(instruction);
+    }
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+  // Although the code generated is a simple `add` instruction, we found through empirical results
+  // that spacing it from its use in memory accesses was beneficial.
+  last_visited_internal_latency_ = kArmNopLatency;
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArmMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
+  Primitive::Type type = instruction->GetType();
+  const bool maybe_compressed_char_at =
+      mirror::kUseStringCompression && instruction->IsStringCharAt();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  HInstruction* index = instruction->InputAt(1);
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      if (maybe_compressed_char_at) {
+        last_visited_internal_latency_ += kArmMemoryLoadLatency;
+      }
+      if (index->IsConstant()) {
+        if (maybe_compressed_char_at) {
+          last_visited_internal_latency_ +=
+              kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+          last_visited_latency_ = kArmBranchLatency;
+        } else {
+          last_visited_latency_ += kArmMemoryLoadLatency;
+        }
+      } else {
+        if (has_intermediate_address) {
+        } else {
+          last_visited_internal_latency_ += kArmIntegerOpLatency;
+        }
+        if (maybe_compressed_char_at) {
+          last_visited_internal_latency_ +=
+              kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+          last_visited_latency_ = kArmBranchLatency;
+        } else {
+          last_visited_latency_ += kArmMemoryLoadLatency;
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
+      } else {
+        if (index->IsConstant()) {
+          last_visited_latency_ = kArmMemoryLoadLatency;
+        } else {
+          if (has_intermediate_address) {
+          } else {
+            last_visited_internal_latency_ += kArmIntegerOpLatency;
+          }
+          last_visited_internal_latency_ = kArmMemoryLoadLatency;
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ += kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ += kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ += kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
+  last_visited_latency_ = kArmMemoryLoadLatency;
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    last_visited_internal_latency_ = kArmMemoryLoadLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
+  HInstruction* index = instruction->InputAt(1);
+  Primitive::Type value_type = instruction->GetComponentType();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+
+  switch (value_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      } else {
+        if (has_intermediate_address) {
+        } else {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+        }
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      if (instruction->InputAt(2)->IsNullConstant()) {
+        if (index->IsConstant()) {
+          last_visited_latency_ = kArmMemoryStoreLatency;
+        } else {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmMemoryStoreLatency;
+        }
+      } else {
+        // Following the exact instructions of runtime type checks is too complicated,
+        // just giving it a simple slow latency.
+        last_visited_latency_ = kArmRuntimeTypeCheckLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unreachable type " << value_type;
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmIntegerOpLatency;
+  // Users do not use any data results.
+  last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
+  if (imm == 0) {
+    last_visited_internal_latency_ = 0;
+    last_visited_latency_ = 0;
+  } else if (imm == 1 || imm == -1) {
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+    last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else {
+    last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
+  Primitive::Type type = instruction->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HInstruction* rhs = instruction->GetRight();
+      if (rhs->IsConstant()) {
+        int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+        HandleDivRemConstantIntegralLatencies(imm);
+      } else {
+        last_visited_latency_ = kArmDivIntegerLatency;
+      }
+      break;
+    }
+    case Primitive::kPrimFloat:
+      last_visited_latency_ = kArmDivFloatLatency;
+      break;
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmDivDoubleLatency;
+      break;
+    default:
+      last_visited_internal_latency_ = kArmCallInternalLatency;
+      last_visited_latency_ = kArmCallLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmCallInternalLatency;
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmCallInternalLatency;
+  last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmLoadStringInternalLatency;
+  last_visited_latency_ = kArmMemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
+  last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
+  if (instruction->IsStringAlloc()) {
+    last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
+  } else {
+    last_visited_internal_latency_ = kArmCallInternalLatency;
+  }
+  last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
+  Primitive::Type type = instruction->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HInstruction* rhs = instruction->GetRight();
+      if (rhs->IsConstant()) {
+        int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+        HandleDivRemConstantIntegralLatencies(imm);
+      } else {
+        last_visited_internal_latency_ = kArmDivIntegerLatency;
+        last_visited_latency_ = kArmMulIntegerLatency;
+      }
+      break;
+    }
+    default:
+      last_visited_internal_latency_ = kArmCallInternalLatency;
+      last_visited_latency_ = kArmCallLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
+                                                          const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+  DCHECK(codegen_ != nullptr);
+  bool is_volatile = field_info.IsVolatile();
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+      last_visited_latency_ = kArmMemoryLoadLatency;
+      break;
+
+    case Primitive::kPrimNot:
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+
+    case Primitive::kPrimLong:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+      last_visited_latency_ = kArmMemoryLoadLatency;
+      break;
+
+    case Primitive::kPrimDouble:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ =
+            kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
+        last_visited_latency_ = kArmIntegerOpLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+
+    default:
+      last_visited_latency_ = kArmMemoryLoadLatency;
+      break;
+  }
+
+  if (is_volatile) {
+    last_visited_internal_latency_ += kArmMemoryBarrierLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
+                                                          const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+  DCHECK(codegen_ != nullptr);
+  bool is_volatile = field_info.IsVolatile();
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      if (is_volatile) {
+        last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
+        last_visited_latency_ = kArmMemoryBarrierLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
+      }
+      last_visited_latency_ = kArmMemoryStoreLatency;
+      break;
+
+    case Primitive::kPrimLong:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ =
+            kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+        last_visited_latency_ = kArmIntegerOpLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+      last_visited_latency_ = kArmMemoryStoreLatency;
+      break;
+
+    case Primitive::kPrimDouble:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ = kArmIntegerOpLatency +
+            kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+        last_visited_latency_ = kArmIntegerOpLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+
+    default:
+      last_visited_latency_ = kArmMemoryStoreLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  DCHECK((block->GetLoopInformation() != nullptr) ||
+         (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+  // Users do not use any data results.
+  last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
+  Primitive::Type result_type = instr->GetResultType();
+  Primitive::Type input_type = instr->GetInputType();
+
+  switch (result_type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      last_visited_latency_ = kArmIntegerOpLatency;  // SBFX or UBFX
+      break;
+
+    case Primitive::kPrimInt:
+      switch (input_type) {
+        case Primitive::kPrimLong:
+          last_visited_latency_ = kArmIntegerOpLatency;  // MOV
+          break;
+        case Primitive::kPrimFloat:
+        case Primitive::kPrimDouble:
+          last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        default:
+          last_visited_latency_ = kArmIntegerOpLatency;
+          break;
+      }
+      break;
+
+    case Primitive::kPrimLong:
+      switch (input_type) {
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+          // MOV and extension
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+          break;
+        case Primitive::kPrimFloat:
+        case Primitive::kPrimDouble:
+          // invokes runtime
+          last_visited_internal_latency_ = kArmCallInternalLatency;
+          break;
+        default:
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+          break;
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+      switch (input_type) {
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+          last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        case Primitive::kPrimLong:
+          // invokes runtime
+          last_visited_internal_latency_ = kArmCallInternalLatency;
+          break;
+        case Primitive::kPrimDouble:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        default:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+      }
+      break;
+
+    case Primitive::kPrimDouble:
+      switch (input_type) {
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+          last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        case Primitive::kPrimLong:
+          last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        case Primitive::kPrimFloat:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        default:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+      }
+      break;
+
+    default:
+      last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) {
+  last_visited_internal_latency_ = kArmIntegerOpLatency;
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
new file mode 100644
index 0000000..8d5e4f3
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+
+#include "code_generator_arm_vixl.h"
+#include "scheduler.h"
+
+namespace art {
+namespace arm {
+#ifdef ART_USE_OLD_ARM_BACKEND
+typedef CodeGeneratorARM CodeGeneratorARMType;
+#else
+typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
+#endif
+
+// AArch32 instruction latencies.
+// We currently assume that all ARM CPUs share the same instruction latency list.
+// The following latencies were tuned based on performance experiments and
+// automatic tuning using differential evolution approach on various benchmarks.
+static constexpr uint32_t kArmIntegerOpLatency = 2;
+static constexpr uint32_t kArmFloatingPointOpLatency = 11;
+static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
+static constexpr uint32_t kArmMulIntegerLatency = 6;
+static constexpr uint32_t kArmMulFloatingPointLatency = 11;
+static constexpr uint32_t kArmDivIntegerLatency = 10;
+static constexpr uint32_t kArmDivFloatLatency = 20;
+static constexpr uint32_t kArmDivDoubleLatency = 25;
+static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
+static constexpr uint32_t kArmMemoryLoadLatency = 9;
+static constexpr uint32_t kArmMemoryStoreLatency = 9;
+static constexpr uint32_t kArmMemoryBarrierLatency = 6;
+static constexpr uint32_t kArmBranchLatency = 4;
+static constexpr uint32_t kArmCallLatency = 5;
+static constexpr uint32_t kArmCallInternalLatency = 29;
+static constexpr uint32_t kArmLoadStringInternalLatency = 10;
+static constexpr uint32_t kArmNopLatency = 2;
+static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
+static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
+
+class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
+ public:
+  explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
+      : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
+
+  // Default visitor for instructions not handled specifically below.
+  void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M)    \
+  M(ArrayGet         , unused)                   \
+  M(ArrayLength      , unused)                   \
+  M(ArraySet         , unused)                   \
+  M(Add              , unused)                   \
+  M(Sub              , unused)                   \
+  M(And              , unused)                   \
+  M(Or               , unused)                   \
+  M(Ror              , unused)                   \
+  M(Xor              , unused)                   \
+  M(Shl              , unused)                   \
+  M(Shr              , unused)                   \
+  M(UShr             , unused)                   \
+  M(Mul              , unused)                   \
+  M(Div              , unused)                   \
+  M(Condition        , unused)                   \
+  M(Compare          , unused)                   \
+  M(BoundsCheck      , unused)                   \
+  M(InstanceFieldGet , unused)                   \
+  M(InstanceFieldSet , unused)                   \
+  M(InstanceOf       , unused)                   \
+  M(Invoke           , unused)                   \
+  M(LoadString       , unused)                   \
+  M(NewArray         , unused)                   \
+  M(NewInstance      , unused)                   \
+  M(Rem              , unused)                   \
+  M(StaticFieldGet   , unused)                   \
+  M(StaticFieldSet   , unused)                   \
+  M(SuspendCheck     , unused)                   \
+  M(TypeConversion   , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+  M(BitwiseNegatedRight, unused)                 \
+  M(MultiplyAccumulate, unused)                  \
+  M(IntermediateAddress, unused)                 \
+  M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused)  \
+  void Visit##type(H##type* instruction) OVERRIDE;
+
+  FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleBinaryOperationLantencies(HBinaryOperation* instr);
+  void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
+  void HandleShiftLatencies(HBinaryOperation* instr);
+  void HandleDivRemConstantIntegralLatencies(int32_t imm);
+  void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleGenerateDataProcInstruction(bool internal_latency = false);
+  void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
+  void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
+
+  // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
+  // latency visitors may query CodeGenerator for such information for accurate latency settings.
+  CodeGeneratorARMType* codegen_;
+};
+
+class HSchedulerARM : public HScheduler {
+ public:
+  HSchedulerARM(ArenaAllocator* arena,
+                SchedulingNodeSelector* selector,
+                SchedulingLatencyVisitorARM* arm_latency_visitor)
+      : HScheduler(arena, arm_latency_visitor, selector) {}
+  ~HSchedulerARM() OVERRIDE {}
+
+  bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+  HInstruction::InstructionKind::k##type:
+    switch (instruction->GetKind()) {
+      FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+        return true;
+      FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
+        return true;
+      default:
+        return HScheduler::IsSchedulable(instruction);
+    }
+#undef CASE_INSTRUCTION_KIND
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 31d13e2..84fadb4 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -28,6 +28,10 @@
 #include "scheduler_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
 namespace art {
 
 // Return all combinations of ISA and code generator that are executable on
@@ -65,133 +69,151 @@
   return v;
 }
 
-class SchedulerTest : public CommonCompilerTest {};
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-TEST_F(SchedulerTest, DependencyGraph) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->AddBlock(block1);
-  graph->SetEntryBlock(entry);
-
-  // entry:
-  // array         ParameterValue
-  // c1            IntConstant
-  // c2            IntConstant
-  // block1:
-  // add1          Add [c1, c2]
-  // add2          Add [add1, c2]
-  // mul           Mul [add1, add2]
-  // div_check     DivZeroCheck [add2] (env: add2, mul)
-  // div           Div [add1, div_check]
-  // array_get1    ArrayGet [array, add1]
-  // array_set1    ArraySet [array, add1, add2]
-  // array_get2    ArrayGet [array, add1]
-  // array_set2    ArraySet [array, add1, add2]
-
-  HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
-                                                         dex::TypeIndex(0),
-                                                         0,
-                                                         Primitive::kPrimNot);
-  HInstruction* c1 = graph->GetIntConstant(1);
-  HInstruction* c2 = graph->GetIntConstant(10);
-  HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
-  HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
-  HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
-  HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
-  HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
-  HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
-  HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
-  HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
-  HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
-
-  DCHECK(div_check->CanThrow());
-
-  entry->AddInstruction(array);
-
-  HInstruction* block_instructions[] = {add1,
-                                        add2,
-                                        mul,
-                                        div_check,
-                                        div,
-                                        array_get1,
-                                        array_set1,
-                                        array_get2,
-                                        array_set2};
-  for (auto instr : block_instructions) {
-    block1->AddInstruction(instr);
+class SchedulerTest : public CommonCompilerTest {
+ public:
+  SchedulerTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
   }
 
-  HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
-                                                            2,
-                                                            graph->GetArtMethod(),
+  // Build scheduling graph, and run target specific scheduling on it.
+  void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) {
+    HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+    HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(entry);
+    graph_->AddBlock(block1);
+    graph_->SetEntryBlock(entry);
+
+    // entry:
+    // array         ParameterValue
+    // c1            IntConstant
+    // c2            IntConstant
+    // block1:
+    // add1          Add [c1, c2]
+    // add2          Add [add1, c2]
+    // mul           Mul [add1, add2]
+    // div_check     DivZeroCheck [add2] (env: add2, mul)
+    // div           Div [add1, div_check]
+    // array_get1    ArrayGet [array, add1]
+    // array_set1    ArraySet [array, add1, add2]
+    // array_get2    ArrayGet [array, add1]
+    // array_set2    ArraySet [array, add1, add2]
+
+    HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                            dex::TypeIndex(0),
                                                             0,
-                                                            div_check);
-  div_check->SetRawEnvironment(environment);
-  environment->SetRawEnvAt(0, add2);
-  add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
-  environment->SetRawEnvAt(1, mul);
-  mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+                                                            Primitive::kPrimNot);
+    HInstruction* c1 = graph_->GetIntConstant(1);
+    HInstruction* c2 = graph_->GetIntConstant(10);
+    HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2);
+    HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2);
+    HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2);
+    HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0);
+    HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+    HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+    HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+    HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+    HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
 
-  ArenaAllocator* arena = graph->GetArena();
-  CriticalPathSchedulingNodeSelector critical_path_selector;
-  arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
-  SchedulingGraph scheduling_graph(&scheduler, arena);
-  // Instructions must be inserted in reverse order into the scheduling graph.
-  for (auto instr : ReverseRange(block_instructions)) {
-    scheduling_graph.AddNode(instr);
+    DCHECK(div_check->CanThrow());
+
+    entry->AddInstruction(array);
+
+    HInstruction* block_instructions[] = {add1,
+                                          add2,
+                                          mul,
+                                          div_check,
+                                          div,
+                                          array_get1,
+                                          array_set1,
+                                          array_get2,
+                                          array_set2};
+    for (auto instr : block_instructions) {
+      block1->AddInstruction(instr);
+    }
+
+    HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_,
+                                                               2,
+                                                               graph_->GetArtMethod(),
+                                                               0,
+                                                               div_check);
+    div_check->SetRawEnvironment(environment);
+    environment->SetRawEnvAt(0, add2);
+    add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+    environment->SetRawEnvAt(1, mul);
+    mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+    SchedulingGraph scheduling_graph(scheduler, graph_->GetArena());
+    // Instructions must be inserted in reverse order into the scheduling graph.
+    for (auto instr : ReverseRange(block_instructions)) {
+      scheduling_graph.AddNode(instr);
+    }
+
+    // Should not have dependencies cross basic blocks.
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+    // Define-use dependency.
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+    // Read and write dependencies
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+    // Env dependency.
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+    ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+    // CanThrow.
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+
+    // Exercise the code path of target specific scheduler and SchedulingLatencyVisitor.
+    scheduler->Schedule(graph_);
   }
 
-  // Should not have dependencies cross basic blocks.
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+  void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) {
+    for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+      HGraph* graph = CreateCFG(&allocator_, data);
 
-  // Define-use dependency.
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+      // Schedule the graph randomly.
+      HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+      scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
 
-  // Read and write dependencies
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+      RunCode(target_config,
+              graph,
+              [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+              has_result, expected);
+    }
+  }
 
-  // Env dependency.
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
-  ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+};
 
-  // CanThrow.
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+#if defined(ART_ENABLE_CODEGEN_arm64)
+TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) {
+  CriticalPathSchedulingNodeSelector critical_path_selector;
+  arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector);
+  TestBuildDependencyGraphAndSchedule(&scheduler);
 }
 #endif
 
-static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
-                                             bool has_result,
-                                             int expected) {
-  for (CodegenTargetConfig target_config : GetTargetConfigs()) {
-    ArenaPool pool;
-    ArenaAllocator arena(&pool);
-    HGraph* graph = CreateCFG(&arena, data);
-
-    // Schedule the graph randomly.
-    HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
-    scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
-
-    RunCode(target_config,
-            graph,
-            [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
-            has_result, expected);
-  }
+#if defined(ART_ENABLE_CODEGEN_arm)
+TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) {
+  CriticalPathSchedulingNodeSelector critical_path_selector;
+  arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
+  arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor);
+  TestBuildDependencyGraphAndSchedule(&scheduler);
 }
+#endif
 
 TEST_F(SchedulerTest, RandomScheduling) {
   //