Merge "Clarify --boot-image help message"
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c3979f3..ca71c32 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,8 +90,9 @@
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table.
-  static constexpr uint16_t kSmallSwitchThreshold = 5;
+  // The number of entries in a packed switch before we use a jump table or specified
+  // compare/jump series.
+  static constexpr uint16_t kSmallSwitchThreshold = 3;
 
  private:
   // Analyzes the dex instruction and adds HInstruction to the graph
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ac6b5e8..0a26786 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
 // S registers. Therefore there is no need to block it.
 static constexpr DRegister DTMP = D31;
 
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -6106,7 +6106,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
       codegen_->GetAssembler()->IsThumb()) {
     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
     if (switch_instr->GetStartValue() != 0) {
@@ -6122,12 +6122,30 @@
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
+  if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
     // Create a series of compare/jumps.
+    Register temp_reg = IP;
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      GenerateCompareWithImmediate(value_reg, lower_bound + i);
-      __ b(codegen_->GetLabelOf(successors[i]), EQ);
+    // Jump to successors[0] if value == lower_bound.
+    __ b(codegen_->GetLabelOf(successors[0]), EQ);
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ AddConstantSetFlags(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      GenerateCompareWithImmediate(temp_reg, 1);
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
     }
 
     // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 04acd9d..227f4be 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -71,10 +71,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
 // generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -546,7 +546,7 @@
 
 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
   uint32_t num_entries = switch_instr_->GetNumEntries();
-  DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
 
   // We are about to use the assembler to place literals directly. Make sure we have enough
   // underlying code buffer and we have generated the jump table with right size.
@@ -4558,20 +4558,29 @@
   // ranges and emit the tables only as required.
   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
 
-  if (num_entries < kPackedSwitchJumpTableThreshold ||
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
       // Current instruction id is an upper bound of the number of HIRs in the graph.
       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
     // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+    Register temp = temps.AcquireW();
+    __ Subs(temp, value_reg, Operand(lower_bound));
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      int32_t case_value = lower_bound + i;
-      vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
-      if (case_value == 0) {
-        __ Cbz(value_reg, succ);
-      } else {
-        __ Cmp(value_reg, Operand(case_value));
-        __ B(eq, succ);
-      }
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Subs(temp, temp, Operand(2));
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp, Operand(1));
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
     }
 
     // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 9dc9167..f872bfe 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4248,19 +4248,31 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Create a set of compare/jumps.
+  Register temp_reg = TMP;
+  __ Addiu32(temp_reg, value_reg, -lower_bound);
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; ++i) {
-    int32_t case_value = lower_bound + i;
-    MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqz(value_reg, successor_label);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beq(value_reg, TMP, successor_label);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
-  // Insert the default branch for every other value.
+  // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
     __ B(codegen_->GetLabelOf(default_block));
   }
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bc5eb31..78f5644 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3975,17 +3975,34 @@
   GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
+  // Create a set of compare/jumps.
+  GpuRegister temp_reg = TMP;
+  if (IsInt<16>(-lower_bound)) {
+    __ Addiu(temp_reg, value_reg, -lower_bound);
+  } else {
+    __ LoadConst32(AT, -lower_bound);
+    __ Addu(temp_reg, value_reg, AT);
+  }
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqzc(value_reg, succ);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beqc(value_reg, TMP, succ);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
   // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 2fb87d3..19f03df 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,7 +42,6 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = EAX;
-
 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
 
 static constexpr int kC2ConditionMask = 0x400;
@@ -6426,31 +6425,67 @@
   locations->SetInAt(0, Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
-  int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
-  LocationSummary* locations = switch_instr->GetLocations();
-  Register value_reg = locations->InAt(0).AsRegister<Register>();
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
+                                                              int32_t lower_bound,
+                                                              uint32_t num_entries,
+                                                              HBasicBlock* switch_block,
+                                                              HBasicBlock* default_block) {
+  // Figure out the correct compare values and jump conditions.
+  // Handle the first compare/branch as a special case because it might
+  // jump to the default case.
+  DCHECK_GT(num_entries, 2u);
+  Condition first_condition;
+  uint32_t index;
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+  if (lower_bound != 0) {
+    first_condition = kLess;
+    __ cmpl(value_reg, Immediate(lower_bound));
+    __ j(first_condition, codegen_->GetLabelOf(default_block));
+    __ j(kEqual, codegen_->GetLabelOf(successors[0]));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
+    index = 1;
+  } else {
+    // Handle all the compare/jumps below.
+    first_condition = kBelow;
+    index = 0;
+  }
+
+  // Handle the rest of the compare/jumps.
+  for (; index + 1 < num_entries; index += 2) {
+    int32_t compare_to_value = lower_bound + index + 1;
+    __ cmpl(value_reg, Immediate(compare_to_value));
+    // Jump to successors[index] if value < case_value[index].
+    __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+    // Jump to successors[index + 1] if value == case_value[index + 1].
+    __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+  }
+
+  if (index != num_entries) {
+    // There are an odd number of entries. Handle the last one.
+    DCHECK_EQ(index + 1, num_entries);
+    __ cmpl(value_reg, Immediate(lower_bound + index));
+    __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   }
 
   // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
+  if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
+    __ jmp(codegen_->GetLabelOf(default_block));
   }
 }
 
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+
+  GenPackedSwitchWithCompares(value_reg,
+                              lower_bound,
+                              num_entries,
+                              switch_instr->GetBlock(),
+                              switch_instr->GetDefaultBlock());
+}
+
 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6465,11 +6500,20 @@
 
 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    GenPackedSwitchWithCompares(value_reg,
+                                lower_bound,
+                                num_entries,
+                                switch_instr->GetBlock(),
+                                default_block);
+    return;
+  }
+
   // Optimizing has a jump area.
   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
   Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6481,7 +6525,7 @@
   }
 
   // Is the value in range?
-  DCHECK_GE(num_entries, 1);
+  DCHECK_GE(num_entries, 1u);
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 064051c..f9403a6 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -195,6 +195,11 @@
 
   X86Assembler* GetAssembler() const { return assembler_; }
 
+  // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+  // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+  // generates less code/data with a small num_entries.
+  static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
+
  private:
   // Generate code for the given suspend check. If not null, `successor`
   // is the block to branch to if the suspend check is not needed, and after
@@ -236,6 +241,11 @@
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenPackedSwitchWithCompares(Register value_reg,
+                                   int32_t lower_bound,
+                                   uint32_t num_entries,
+                                   HBasicBlock* switch_block,
+                                   HBasicBlock* default_block);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4618be9..44a51ea 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,6 +41,10 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
+// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
 
 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -6021,11 +6025,58 @@
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Should we generate smaller inline compare/jumps?
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    // Figure out the correct compare values and jump conditions.
+    // Handle the first compare/branch as a special case because it might
+    // jump to the default case.
+    DCHECK_GT(num_entries, 2u);
+    Condition first_condition;
+    uint32_t index;
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    if (lower_bound != 0) {
+      first_condition = kLess;
+      __ cmpl(value_reg_in, Immediate(lower_bound));
+      __ j(first_condition, codegen_->GetLabelOf(default_block));
+      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+
+      index = 1;
+    } else {
+      // Handle all the compare/jumps below.
+      first_condition = kBelow;
+      index = 0;
+    }
+
+    // Handle the rest of the compare/jumps.
+    for (; index + 1 < num_entries; index += 2) {
+      int32_t compare_to_value = lower_bound + index + 1;
+      __ cmpl(value_reg_in, Immediate(compare_to_value));
+      // Jump to successors[index] if value < case_value[index].
+      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+      // Jump to successors[index + 1] if value == case_value[index + 1].
+      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+    }
+
+    if (index != num_entries) {
+      // There are an odd number of entries. Handle the last one.
+      DCHECK_EQ(index + 1, num_entries);
+      __ cmpl(value_reg_in, Immediate(lower_bound + index));
+      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
+    }
+    return;
+  }
 
   // Remove the bias, if needed.
   Register value_reg_out = value_reg_in.AsRegister();
@@ -6036,7 +6087,6 @@
   CpuRegister value_reg(value_reg_out);
 
   // Is the value in range?
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index b383f1e..a385448 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,6 +15,7 @@
  */
 
 #include "pc_relative_fixups_x86.h"
+#include "code_generator_x86.h"
 
 namespace art {
 namespace x86 {
@@ -79,6 +80,10 @@
   }
 
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    if (switch_insn->GetNumEntries() <=
+        InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
+      return;
+    }
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
     InitializePCRelativeBasePointer();
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 571a2f5..74cc899 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -27,6 +27,7 @@
   base/arena_allocator.cc \
   base/arena_bit_vector.cc \
   base/bit_vector.cc \
+  base/file_magic.cc \
   base/hex_dump.cc \
   base/logging.cc \
   base/mutex.cc \
diff --git a/runtime/base/file_magic.cc b/runtime/base/file_magic.cc
new file mode 100644
index 0000000..9756338
--- /dev/null
+++ b/runtime/base/file_magic.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "file_magic.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "base/logging.h"
+#include "dex_file.h"
+#include "stringprintf.h"
+
+namespace art {
+
+ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
+  CHECK(magic != nullptr);
+  ScopedFd fd(open(filename, O_RDONLY, 0));
+  if (fd.get() == -1) {
+    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
+    return ScopedFd();
+  }
+  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
+  if (n != sizeof(*magic)) {
+    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
+    return ScopedFd();
+  }
+  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
+    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
+                              strerror(errno));
+    return ScopedFd();
+  }
+  return fd;
+}
+
+bool IsZipMagic(uint32_t magic) {
+  return (('P' == ((magic >> 0) & 0xff)) &&
+          ('K' == ((magic >> 8) & 0xff)));
+}
+
+bool IsDexMagic(uint32_t magic) {
+  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
+}
+
+}  // namespace art
diff --git a/runtime/base/file_magic.h b/runtime/base/file_magic.h
new file mode 100644
index 0000000..f7e4bad
--- /dev/null
+++ b/runtime/base/file_magic.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_FILE_MAGIC_H_
+#define ART_RUNTIME_BASE_FILE_MAGIC_H_
+
+#include <stdint.h>
+#include <string>
+
+#include "ScopedFd.h"
+
+namespace art {
+
+// Open file and read magic number
+ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
+
+// Check whether the given magic matches a known file type.
+bool IsZipMagic(uint32_t magic);
+bool IsDexMagic(uint32_t magic);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_FILE_MAGIC_H_
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 07cadc4..78bc3d5 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -17,12 +17,22 @@
 #include "base/unix_file/fd_file.h"
 
 #include <errno.h>
+#include <limits>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include "base/logging.h"
 
+// Includes needed for FdFile::Copy().
+#ifdef __linux__
+#include <sys/sendfile.h>
+#else
+#include <algorithm>
+#include "base/stl_util.h"
+#include "globals.h"
+#endif
+
 namespace unix_file {
 
 FdFile::FdFile() : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true) {
@@ -222,6 +232,52 @@
   return true;
 }
 
+bool FdFile::Copy(FdFile* input_file, int64_t offset, int64_t size) {
+  off_t off = static_cast<off_t>(offset);
+  off_t sz = static_cast<off_t>(size);
+  if (offset < 0 || static_cast<int64_t>(off) != offset ||
+      size < 0 || static_cast<int64_t>(sz) != size ||
+      sz > std::numeric_limits<off_t>::max() - off) {
+    errno = EINVAL;
+    return false;
+  }
+  if (size == 0) {
+    return true;
+  }
+#ifdef __linux__
+  // Use sendfile(), available for files since linux kernel 2.6.33.
+  off_t end = off + sz;
+  while (off != end) {
+    int result = TEMP_FAILURE_RETRY(
+        sendfile(Fd(), input_file->Fd(), &off, end - off));
+    if (result == -1) {
+      return false;
+    }
+    // Ignore the number of bytes in `result`, sendfile() already updated `off`.
+  }
+#else
+  if (lseek(input_file->Fd(), off, SEEK_SET) != off) {
+    return false;
+  }
+  constexpr size_t kMaxBufferSize = 4 * ::art::kPageSize;
+  const size_t buffer_size = std::min<uint64_t>(size, kMaxBufferSize);
+  art::UniqueCPtr<void> buffer(malloc(buffer_size));
+  if (buffer == nullptr) {
+    errno = ENOMEM;
+    return false;
+  }
+  while (size != 0) {
+    size_t chunk_size = std::min<uint64_t>(buffer_size, size);
+    if (!input_file->ReadFully(buffer.get(), chunk_size) ||
+        !WriteFully(buffer.get(), chunk_size)) {
+      return false;
+    }
+    size -= chunk_size;
+  }
+#endif
+  return true;
+}
+
 void FdFile::Erase() {
   TEMP_FAILURE_RETRY(SetLength(0));
   TEMP_FAILURE_RETRY(Flush());
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index f47368b..231a1ab 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -50,12 +50,12 @@
   bool Open(const std::string& file_path, int flags, mode_t mode);
 
   // RandomAccessFile API.
-  virtual int Close() WARN_UNUSED;
-  virtual int64_t Read(char* buf, int64_t byte_count, int64_t offset) const WARN_UNUSED;
-  virtual int SetLength(int64_t new_length) WARN_UNUSED;
-  virtual int64_t GetLength() const;
-  virtual int64_t Write(const char* buf, int64_t byte_count, int64_t offset) WARN_UNUSED;
-  virtual int Flush() WARN_UNUSED;
+  int Close() OVERRIDE WARN_UNUSED;
+  int64_t Read(char* buf, int64_t byte_count, int64_t offset) const OVERRIDE WARN_UNUSED;
+  int SetLength(int64_t new_length) OVERRIDE WARN_UNUSED;
+  int64_t GetLength() const OVERRIDE;
+  int64_t Write(const char* buf, int64_t byte_count, int64_t offset) OVERRIDE WARN_UNUSED;
+  int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
   void Erase();
@@ -77,6 +77,9 @@
   bool PreadFully(void* buffer, size_t byte_count, size_t offset) WARN_UNUSED;
   bool WriteFully(const void* buffer, size_t byte_count) WARN_UNUSED;
 
+  // Copy data from another file.
+  bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
     kBase,           // Base, file has not been flushed or closed.
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 388f717..ecf607c 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -110,4 +110,34 @@
   ASSERT_EQ(file.Close(), 0);
 }
 
+TEST_F(FdFileTest, Copy) {
+  art::ScratchFile src_tmp;
+  FdFile src;
+  ASSERT_TRUE(src.Open(src_tmp.GetFilename(), O_RDWR));
+  ASSERT_GE(src.Fd(), 0);
+  ASSERT_TRUE(src.IsOpened());
+
+  char src_data[] = "Some test data.";
+  ASSERT_TRUE(src.WriteFully(src_data, sizeof(src_data)));  // Including the zero terminator.
+  ASSERT_EQ(0, src.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), src.GetLength());
+
+  art::ScratchFile dest_tmp;
+  FdFile dest;
+  ASSERT_TRUE(dest.Open(src_tmp.GetFilename(), O_RDWR));
+  ASSERT_GE(dest.Fd(), 0);
+  ASSERT_TRUE(dest.IsOpened());
+
+  ASSERT_TRUE(dest.Copy(&src, 0, sizeof(src_data)));
+  ASSERT_EQ(0, dest.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), dest.GetLength());
+
+  char check_data[sizeof(src_data)];
+  ASSERT_TRUE(dest.PreadFully(check_data, sizeof(src_data), 0u));
+  CHECK_EQ(0, memcmp(check_data, src_data, sizeof(src_data)));
+
+  ASSERT_EQ(0, dest.Close());
+  ASSERT_EQ(0, src.Close());
+}
+
 }  // namespace unix_file
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 4163e2e..30d921a 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -29,6 +29,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/file_magic.h"
 #include "base/hash_map.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
@@ -62,26 +63,6 @@
 const uint8_t DexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };
 const uint8_t DexFile::kDexMagicVersion[] = { '0', '3', '5', '\0' };
 
-static int OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
-  CHECK(magic != nullptr);
-  ScopedFd fd(open(filename, O_RDONLY, 0));
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
-    return -1;
-  }
-  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
-  if (n != sizeof(*magic)) {
-    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
-    return -1;
-  }
-  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
-    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
-                              strerror(errno));
-    return -1;
-  }
-  return fd.release();
-}
-
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != nullptr);
   uint32_t magic;
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 68db7e3..eddc3a4 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1392,21 +1392,6 @@
   return filename;
 }
 
-bool IsZipMagic(uint32_t magic) {
-  return (('P' == ((magic >> 0) & 0xff)) &&
-          ('K' == ((magic >> 8) & 0xff)));
-}
-
-bool IsDexMagic(uint32_t magic) {
-  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
-}
-
-bool IsOatMagic(uint32_t magic) {
-  return (memcmp(reinterpret_cast<const uint8_t*>(magic),
-                 OatHeader::kOatMagic,
-                 sizeof(OatHeader::kOatMagic)) == 0);
-}
-
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
   const std::string command_line(Join(arg_vector, ' '));
 
diff --git a/runtime/utils.h b/runtime/utils.h
index 8b7941a..5b9e963 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -273,11 +273,6 @@
 // Returns the system location for an image
 std::string GetSystemImageFilename(const char* location, InstructionSet isa);
 
-// Check whether the given magic matches a known file type.
-bool IsZipMagic(uint32_t magic);
-bool IsDexMagic(uint32_t magic);
-bool IsOatMagic(uint32_t magic);
-
 // Wrapper on fork/execv to run a command in a subprocess.
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg);