Merge "ARM: VIXL32: Improve codegen on CBZ/CBNZ"
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b8d9a91..7aea616 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1152,7 +1152,8 @@
 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
                                                             size_t condition_input_index,
                                                             vixl32::Label* true_target,
-                                                            vixl32::Label* false_target) {
+                                                            vixl32::Label* false_target,
+                                                            bool far_target) {
   HInstruction* cond = instruction->InputAt(condition_input_index);
 
   if (true_target == nullptr && false_target == nullptr) {
@@ -1188,9 +1189,13 @@
       DCHECK(cond_val.IsRegister());
     }
     if (true_target == nullptr) {
-      __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
+      __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
+                                false_target,
+                                far_target);
     } else {
-      __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
+      __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
+                                   true_target,
+                                   far_target);
     }
   } else {
     // Condition has not been materialized. Use its inputs as the comparison and
@@ -1285,7 +1290,8 @@
   GenerateTestAndBranch(select,
                         /* condition_input_index */ 2,
                         /* true_target */ nullptr,
-                        &false_target);
+                        &false_target,
+                        /* far_target */ false);
   codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
   __ Bind(&false_target);
 }
@@ -2775,7 +2781,7 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
-        __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+        __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
       } else {
         DCHECK(value.IsConstant()) << value;
         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
@@ -3441,7 +3447,7 @@
   __ Ldrexd(temp1, temp2, addr);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   __ Strexd(temp1, value_lo, value_hi, addr);
-  __ Cbnz(temp1, &fail);
+  __ CompareAndBranchIfNonZero(temp1, &fail);
 }
 
 void LocationsBuilderARMVIXL::HandleFieldSet(
@@ -3957,7 +3963,7 @@
   NullCheckSlowPathARMVIXL* slow_path =
       new (GetGraph()->GetArena()) NullCheckSlowPathARMVIXL(instruction);
   AddSlowPath(slow_path);
-  __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+  __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
@@ -4413,7 +4419,7 @@
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
           vixl32::Label non_zero;
-          __ Cbnz(value, &non_zero);
+          __ CompareAndBranchIfNonZero(value, &non_zero);
           if (index.IsConstant()) {
             size_t offset =
                (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -4461,7 +4467,7 @@
           GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
           // If heap poisoning is enabled, no need to unpoison
           // `temp1`, as we are comparing against null below.
-          __ Cbnz(temp1, slow_path->GetEntryLabel());
+          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
           __ Bind(&do_put);
         } else {
           __ B(ne, slow_path->GetEntryLabel());
@@ -4638,7 +4644,7 @@
                                       bool can_be_null) {
   vixl32::Label is_null;
   if (can_be_null) {
-    __ Cbz(value, &is_null);
+    __ CompareAndBranchIfZero(value, &is_null);
   }
   GetAssembler()->LoadFromOffset(
       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
@@ -4697,10 +4703,10 @@
   GetAssembler()->LoadFromOffset(
       kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
   if (successor == nullptr) {
-    __ Cbnz(temp, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
   } else {
-    __ Cbz(temp, codegen_->GetLabelOf(successor));
+    __ CompareAndBranchIfZero(temp, codegen_->GetLabelOf(successor));
     __ B(slow_path->GetEntryLabel());
   }
 }
@@ -5015,7 +5021,7 @@
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
     if (generate_null_check) {
-      __ Cbz(out, slow_path->GetEntryLabel());
+      __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
     }
     if (cls->MustGenerateClinitCheck()) {
       GenerateClassInitializationCheck(slow_path, out);
@@ -5206,7 +5212,7 @@
   // Return 0 if `obj` is null.
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ Cbz(obj, &zero);
+    __ CompareAndBranchIfZero(obj, &zero, /* far_target */ false);
   }
 
   switch (type_check_kind) {
@@ -5239,7 +5245,7 @@
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
-      __ Cbz(out, &done);
+      __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
       __ Cmp(out, cls);
       __ B(ne, &loop);
       __ Mov(out, 1);
@@ -5263,7 +5269,7 @@
       __ B(eq, &success);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
-      __ Cbnz(out, &loop);
+      __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
       __ Bind(&success);
@@ -5289,10 +5295,10 @@
       // /* HeapReference<Class> */ out = out->component_type_
       GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
-      __ Cbz(out, &done);
+      __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbnz(out, &zero);
+      __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
       __ Bind(&exact_check);
       __ Mov(out, 1);
       __ B(&done);
@@ -5428,7 +5434,7 @@
   vixl32::Label done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ Cbz(obj, &done);
+    __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
@@ -5454,7 +5460,7 @@
 
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
-      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
 
       // Otherwise, compare the classes.
       __ Cmp(temp, cls);
@@ -5474,7 +5480,7 @@
 
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
-      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
       // Otherwise, jump to the beginning of the loop.
       __ B(&loop);
       break;
@@ -5489,12 +5495,12 @@
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
       // If the component type is null, jump to the slow path to throw the exception.
-      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
       // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
       // to further check that this component type is not a primitive type.
       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
-      __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 302ee38..38c756f 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -369,7 +369,7 @@
   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR)
 
   ArmVIXLAssembler* GetAssembler() const { return assembler_; }
-  vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+  ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
  private:
   void VisitUnimplemementedInstruction(HInstruction* instruction) {
@@ -451,7 +451,8 @@
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              vixl::aarch32::Label* true_target,
-                             vixl::aarch32::Label* false_target);
+                             vixl::aarch32::Label* false_target,
+                             bool far_target = true);
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     vixl::aarch32::Label* true_target,
                                     vixl::aarch32::Label* false_target);
@@ -504,7 +505,7 @@
 
   const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; }
 
-  vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+  ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
   size_t GetWordSize() const OVERRIDE { return kArmWordSize; }
 
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 110430f..7a1ec9f 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -303,7 +303,7 @@
     vixl32::Register in_reg_hi = HighRegisterFrom(in);
     vixl32::Label end;
     __ Clz(out, in_reg_hi);
-    __ Cbnz(in_reg_hi, &end);
+    __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
     __ Clz(out, in_reg_lo);
     __ Add(out, out, 32);
     __ Bind(&end);
@@ -345,7 +345,7 @@
     vixl32::Label end;
     __ Rbit(out, in_reg_lo);
     __ Clz(out, out);
-    __ Cbnz(in_reg_lo, &end);
+    __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
     __ Rbit(out, in_reg_hi);
     __ Clz(out, out);
     __ Add(out, out, 32);
@@ -1158,7 +1158,7 @@
   if (can_slow_path) {
     slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
     codegen_->AddSlowPath(slow_path);
-    __ Cbz(arg, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
   }
 
   // Reference equality check, return 0 if same reference.
@@ -1191,7 +1191,9 @@
   }
 
   // Shorter string is empty?
-  __ Cbz(temp0, &end);
+  // Note that mirror::kUseStringCompression==true introduces lots of instructions,
+  // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
+  __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
 
   if (mirror::kUseStringCompression) {
     // Check if both strings using same compression style to use this comparison loop.
@@ -1414,7 +1416,7 @@
   StringEqualsOptimizations optimizations(invoke);
   if (!optimizations.GetArgumentNotNull()) {
     // Check if input is null, return false if it is.
-    __ Cbz(arg, &return_false);
+    __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
   }
 
   // Reference equality check, return true if same reference.
@@ -1442,7 +1444,7 @@
   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
                 "Expecting 0=compressed, 1=uncompressed");
-  __ Cbz(temp, &return_true);
+  __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
 
   // Assertions that must hold in order to compare strings 4 bytes at a time.
   DCHECK_ALIGNED(value_offset, 4);
@@ -1718,7 +1720,7 @@
   } else if (length_is_input_length) {
     // The only way the copy can succeed is if pos is zero.
     vixl32::Register pos_reg = RegisterFrom(pos);
-    __ Cbnz(pos_reg, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
   } else {
     // Check that pos >= 0.
     vixl32::Register pos_reg = RegisterFrom(pos);
@@ -1815,12 +1817,12 @@
 
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
-    __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
+    __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
   }
 
   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
     // Bail out if the destination is null.
-    __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
+    __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
   }
 
   // If the length is negative, bail out.
@@ -1865,13 +1867,13 @@
         // /* HeapReference<Class> */ temp1 = temp1->component_type_
         codegen_->GenerateFieldLoadWithBakerReadBarrier(
             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
-        __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
         // If heap poisoning is enabled, `temp1` has been unpoisoned
         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
       }
 
       // /* HeapReference<Class> */ temp1 = dest->klass_
@@ -1889,13 +1891,13 @@
         // /* HeapReference<Class> */ temp2 = temp1->component_type_
         codegen_->GenerateFieldLoadWithBakerReadBarrier(
             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
-        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
         // If heap poisoning is enabled, `temp2` has been unpoisoned
         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-        __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
       }
 
       // For the same reason given earlier, `temp1` is not trashed by the
@@ -1918,7 +1920,7 @@
         // comparison with null below, and this reference is not
         // kept afterwards.
         __ Ldr(temp1, MemOperand(temp1, super_offset));
-        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
         __ Bind(&do_copy);
       } else {
         __ B(ne, intrinsic_slow_path->GetEntryLabel());
@@ -1944,24 +1946,24 @@
         // Bail out if the destination is not a non primitive array.
         // /* HeapReference<Class> */ temp3 = temp1->component_type_
         __ Ldr(temp3, MemOperand(temp1, component_offset));
-        __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
         assembler->MaybeUnpoisonHeapReference(temp3);
         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-        __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
       }
 
       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
         // Bail out if the source is not a non primitive array.
         // /* HeapReference<Class> */ temp3 = temp2->component_type_
         __ Ldr(temp3, MemOperand(temp2, component_offset));
-        __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
         assembler->MaybeUnpoisonHeapReference(temp3);
         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-        __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
       }
 
       __ Cmp(temp1, temp2);
@@ -1978,7 +1980,7 @@
         // /* HeapReference<Class> */ temp1 = temp1->super_class_
         __ Ldr(temp1, MemOperand(temp1, super_offset));
         // No need to unpoison the result, we're comparing against null.
-        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
         __ Bind(&do_copy);
       } else {
         __ B(ne, intrinsic_slow_path->GetEntryLabel());
@@ -1994,7 +1996,7 @@
       // /* HeapReference<Class> */ temp3 = temp1->component_type_
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
-      __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
       // If heap poisoning is enabled, `temp3` has been unpoisoned
       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
     } else {
@@ -2003,13 +2005,13 @@
       assembler->MaybeUnpoisonHeapReference(temp1);
       // /* HeapReference<Class> */ temp3 = temp1->component_type_
       __ Ldr(temp3, MemOperand(temp1, component_offset));
-      __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
       assembler->MaybeUnpoisonHeapReference(temp3);
     }
     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-    __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+    __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   }
 
   int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index e3b9fb6..1aaa231 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -429,5 +429,31 @@
   }
 }
 
+void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
+                                                   vixl32::Label* label,
+                                                   bool is_far_target) {
+  if (!is_far_target && rn.IsLow() && !label->IsBound()) {
+    // In T32, Cbz/Cbnz instructions have following limitations:
+    // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
+    // - Only low registers (i.e R0 .. R7) can be encoded.
+    // - Only forward branches (unbound labels) are supported.
+    Cbz(rn, label);
+    return;
+  }
+  Cmp(rn, 0);
+  B(eq, label);
+}
+
+void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
+                                                      vixl32::Label* label,
+                                                      bool is_far_target) {
+  if (!is_far_target && rn.IsLow() && !label->IsBound()) {
+    Cbnz(rn, label);
+    return;
+  }
+  Cmp(rn, 0);
+  B(ne, label);
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index e020628..b4a4abc 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -37,6 +37,25 @@
 namespace art {
 namespace arm {
 
+class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler {
+ public:
+  // The following interfaces can generate CMP+Bcc or Cbz/Cbnz.
+  // CMP+Bcc are generated by default.
+  // If a hint is given (is_far_target = false) and rn and label can all fit into Cbz/Cbnz,
+  // then Cbz/Cbnz is generated.
+  // Prefer following interfaces to using vixl32::MacroAssembler::Cbz/Cbnz.
+  // In T32, Cbz/Cbnz instructions have following limitations:
+  // - Far targets, which are over 126 bytes away, are not supported.
+  // - Only low registers can be encoded.
+  // - Backward branches are not supported.
+  void CompareAndBranchIfZero(vixl32::Register rn,
+                              vixl32::Label* label,
+                              bool is_far_target = true);
+  void CompareAndBranchIfNonZero(vixl32::Register rn,
+                                 vixl32::Label* label,
+                                 bool is_far_target = true);
+};
+
 class ArmVIXLAssembler FINAL : public Assembler {
  private:
   class ArmException;
@@ -48,7 +67,7 @@
   }
 
   virtual ~ArmVIXLAssembler() {}
-  vixl32::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
+  ArmVIXLMacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
   void FinalizeCode() OVERRIDE;
 
   // Size of generated code.
@@ -117,7 +136,7 @@
 
  private:
   // VIXL assembler.
-  vixl32::MacroAssembler vixl_masm_;
+  ArmVIXLMacroAssembler vixl_masm_;
 };
 
 // Thread register declaration.
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 23b2774..b2bbd72 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -574,10 +574,12 @@
 
   switch (condition) {
     case JNIMacroUnaryCondition::kZero:
-      ___ Cbz(test.AsArm().AsVIXLRegister(), ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfZero(test.AsArm().AsVIXLRegister(),
+                                 ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ Cbnz(test.AsArm().AsVIXLRegister(), ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfNonZero(test.AsArm().AsVIXLRegister(),
+                                    ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     default:
       LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);