Some optimizations for the array alloc path.

- Force Array::Alloc() to be inlined.
- Simplify the array size overflow check.
- Turn fill_usable into a template parameter.
- Remove a branch in Array::DataOffset() and avoid
  Primitive::ComponentSize(), which has a switch, in the array alloc
  path.
- Strength reductions in the array size computation by using component
  size shifts instead of component sizes. Store component size shift
  in the upper 16 bits of primitive_type field.
- Speedup: ~4% (3435->3284) in MemAllocTest on N4.

Bug: 9986565

Change-Id: I4b142ffac4ab8b5b915836f1660a949d6442344c
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index f927720..9090f29 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -314,6 +314,7 @@
     java_lang_Class->AssertReadBarrierPointer();
   }
   java_lang_Class->SetClassSize(mirror::Class::ClassClassSize());
+  java_lang_Class->SetPrimitiveType(Primitive::kPrimNot);
   heap->DecrementDisableMovingGC(self);
   // AllocClass(mirror::Class*) can now be used
 
@@ -338,6 +339,12 @@
   // Setup the char (primitive) class to be used for char[].
   Handle<mirror::Class> char_class(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(), mirror::Class::PrimitiveClassSize())));
+  // The primitive char class won't be initialized by
+  // InitializePrimitiveClass until line 459, but strings (and
+  // internal char arrays) will be allocated before that and the
+  // component size, which is computed from the primitive type, needs
+  // to be set here.
+  char_class->SetPrimitiveType(Primitive::kPrimChar);
 
   // Setup the char[] class to be used for String.
   Handle<mirror::Class> char_array_class(hs.NewHandle(
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a9c4b4a..5729ad8 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1422,7 +1422,7 @@
     return error;
   }
   *new_array = gRegistry->Add(mirror::Array::Alloc<true>(Thread::Current(), c, length,
-                                                         c->GetComponentSize(),
+                                                         c->GetComponentSizeShift(),
                                                          Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   return JDWP::ERR_NONE;
 }
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 4ef7d74..9fb9a3b 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -230,11 +230,11 @@
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
     return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                               klass->GetComponentSize(),
+                                               klass->GetComponentSizeShift(),
                                                heap->GetCurrentAllocator());
   }
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                             klass->GetComponentSize(), allocator_type);
+                                             klass->GetComponentSizeShift(), allocator_type);
 }
 
 template <bool kAccessCheck, bool kInstrumented>
@@ -259,7 +259,7 @@
   // No need to retry a slow-path allocation as the above code won't cause a GC or thread
   // suspension.
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                             klass->GetComponentSize(), allocator_type);
+                                             klass->GetComponentSizeShift(), allocator_type);
 }
 
 template<FindFieldType type, bool access_check>
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index a78c2c0..835d6e2 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -90,7 +90,8 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
   // the heap switched the allocator type while we were suspended.
-  return mirror::Array::Alloc<false>(self, klass, component_count, klass->GetComponentSize(),
+  return mirror::Array::Alloc<false>(self, klass, component_count,
+                                     klass->GetComponentSizeShift(),
                                      heap->GetCurrentAllocator());
 }
 
@@ -109,7 +110,8 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
   // the heap switched the allocator type while we were suspended.
-  return mirror::Array::Alloc<true>(self, klass, component_count, klass->GetComponentSize(),
+  return mirror::Array::Alloc<true>(self, klass, component_count,
+                                    klass->GetComponentSizeShift(),
                                     heap->GetCurrentAllocator());
 }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index cd8cce0..d2fcc64 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -36,8 +36,8 @@
     mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(self, &element_class);
     DCHECK(array_class != nullptr);
     gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
-    result->SetL(mirror::Array::Alloc<true>(self, array_class, length,
-                                            array_class->GetComponentSize(), allocator, true));
+    result->SetL(mirror::Array::Alloc<true, true>(self, array_class, length,
+                                                  array_class->GetComponentSizeShift(), allocator));
   } else if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
     result->SetL(NULL);
   } else if (name == "java.lang.Class dalvik.system.VMStack.getStackClass2()") {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 9f08013..0e3c2de 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -708,7 +708,8 @@
     }
     return false;
   }
-  Object* newArray = Array::Alloc<true>(self, arrayClass, length, arrayClass->GetComponentSize(),
+  Object* newArray = Array::Alloc<true>(self, arrayClass, length,
+                                        arrayClass->GetComponentSizeShift(),
                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
   if (UNLIKELY(newArray == NULL)) {
     DCHECK(self->IsExceptionPending());
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 213dbc2..6582226 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -35,13 +35,13 @@
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
-  size_t component_size =
-      GetClass<kVerifyFlags, kReadBarrierOption>()->template GetComponentSize<kReadBarrierOption>();
+  size_t component_size_shift = GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template GetComponentSizeShift<kReadBarrierOption>();
   // Don't need to check this since we already check this in GetClass.
   int32_t component_count =
       GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>();
-  size_t header_size = DataOffset(component_size).SizeValue();
-  size_t data_size = component_count * component_size;
+  size_t header_size = DataOffset(1U << component_size_shift).SizeValue();
+  size_t data_size = component_count << component_size_shift;
   return header_size + data_size;
 }
 
@@ -56,24 +56,36 @@
 }
 
 static inline size_t ComputeArraySize(Thread* self, Class* array_class, int32_t component_count,
-                                      size_t component_size)
+                                      size_t component_size_shift)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(array_class != NULL);
   DCHECK_GE(component_count, 0);
   DCHECK(array_class->IsArrayClass());
 
+  size_t component_size = 1U << component_size_shift;
   size_t header_size = Array::DataOffset(component_size).SizeValue();
-  size_t data_size = component_count * component_size;
+  size_t data_size = static_cast<size_t>(component_count) << component_size_shift;
   size_t size = header_size + data_size;
 
-  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
-  size_t component_shift = sizeof(size_t) * 8 - 1 - CLZ(component_size);
-  if (UNLIKELY(data_size >> component_shift != size_t(component_count) || size < data_size)) {
+  // Check for size_t overflow and throw OutOfMemoryError if this was
+  // an unreasonable request.
+#ifdef __LP64__
+  // 64-bit. No overflow as component_count is 32-bit and the maximum
+  // component size is 8.
+  DCHECK_LE((1U << component_size_shift), 8U);
+#else
+  // 32-bit.
+  DCHECK_NE(header_size, 0U);
+  DCHECK_EQ(RoundUp(header_size, component_size), header_size);
+  // The array length limit (exclusive).
+  const size_t length_limit = (0U - header_size) >> component_size_shift;
+  if (UNLIKELY(length_limit <= static_cast<size_t>(component_count))) {
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
                                              PrettyDescriptor(array_class).c_str(),
                                              component_count).c_str());
     return 0;  // failure
   }
+#endif
   return size;
 }
 
@@ -103,8 +115,10 @@
 // array.
 class SetLengthToUsableSizeVisitor {
  public:
-  SetLengthToUsableSizeVisitor(int32_t min_length, size_t header_size, size_t component_size) :
-      minimum_length_(min_length), header_size_(header_size), component_size_(component_size) {
+  SetLengthToUsableSizeVisitor(int32_t min_length, size_t header_size,
+                               size_t component_size_shift) :
+      minimum_length_(min_length), header_size_(header_size),
+      component_size_shift_(component_size_shift) {
   }
 
   void operator()(Object* obj, size_t usable_size) const
@@ -112,10 +126,12 @@
     // Avoid AsArray as object is not yet in live bitmap or allocation stack.
     Array* array = down_cast<Array*>(obj);
     // DCHECK(array->IsArrayInstance());
-    int32_t length = (usable_size - header_size_) / component_size_;
+    int32_t length = (usable_size - header_size_) >> component_size_shift_;
     DCHECK_GE(length, minimum_length_);
-    byte* old_end = reinterpret_cast<byte*>(array->GetRawData(component_size_, minimum_length_));
-    byte* new_end = reinterpret_cast<byte*>(array->GetRawData(component_size_, length));
+    byte* old_end = reinterpret_cast<byte*>(array->GetRawData(1U << component_size_shift_,
+                                                              minimum_length_));
+    byte* new_end = reinterpret_cast<byte*>(array->GetRawData(1U << component_size_shift_,
+                                                              length));
     // Ensure space beyond original allocation is zeroed.
     memset(old_end, 0, new_end - old_end);
     array->SetLength(length);
@@ -124,38 +140,46 @@
  private:
   const int32_t minimum_length_;
   const size_t header_size_;
-  const size_t component_size_;
+  const size_t component_size_shift_;
 
   DISALLOW_COPY_AND_ASSIGN(SetLengthToUsableSizeVisitor);
 };
 
-template <bool kIsInstrumented>
+template <bool kIsInstrumented, bool kFillUsable>
 inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                           size_t component_size, gc::AllocatorType allocator_type,
-                           bool fill_usable) {
+                           size_t component_size_shift, gc::AllocatorType allocator_type) {
   DCHECK(allocator_type != gc::kAllocatorTypeLOS);
-  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
+  DCHECK_EQ(array_class->GetComponentSizeShift(), component_size_shift);
+  DCHECK_EQ(array_class->GetComponentSize(), (1U << component_size_shift));
+  size_t size = ComputeArraySize(self, array_class, component_count, component_size_shift);
+#ifdef __LP64__
+  // 64-bit. No size_t overflow.
+  DCHECK_NE(size, 0U);
+#else
+  // 32-bit.
   if (UNLIKELY(size == 0)) {
     return nullptr;
   }
+#endif
   gc::Heap* heap = Runtime::Current()->GetHeap();
   Array* result;
-  if (!fill_usable) {
+  if (!kFillUsable) {
     SetLengthVisitor visitor(component_count);
     result = down_cast<Array*>(
         heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
                                                               allocator_type, visitor));
   } else {
-    SetLengthToUsableSizeVisitor visitor(component_count, DataOffset(component_size).SizeValue(),
-                                         component_size);
+    SetLengthToUsableSizeVisitor visitor(component_count,
+                                         DataOffset(1U << component_size_shift).SizeValue(),
+                                         component_size_shift);
     result = down_cast<Array*>(
         heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
                                                               allocator_type, visitor));
   }
   if (kIsDebugBuild && result != nullptr && Runtime::Current()->IsStarted()) {
     array_class = result->GetClass();  // In case the array class moved.
-    CHECK_EQ(array_class->GetComponentSize(), component_size);
-    if (!fill_usable) {
+    CHECK_EQ(array_class->GetComponentSize(), 1U << component_size_shift);
+    if (!kFillUsable) {
       CHECK_EQ(result->SizeOf(), size);
     } else {
       CHECK_GE(result->SizeOf(), size);
@@ -173,7 +197,8 @@
 
 template<typename T>
 inline PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
-  Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length, sizeof(T),
+  Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length,
+                                        ComponentSizeShiftWidth<sizeof(T)>(),
                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
   return down_cast<PrimitiveArray<T>*>(raw_array);
 }
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 4535f6c..636be33 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -48,7 +48,8 @@
   StackHandleScope<1> hs(self);
   Handle<Array> new_array(
       hs.NewHandle(
-          Array::Alloc<true>(self, array_class.Get(), array_length, array_class->GetComponentSize(),
+          Array::Alloc<true>(self, array_class.Get(), array_length,
+                             array_class->GetComponentSizeShift(),
                              Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   if (UNLIKELY(new_array.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 7af88d6..521d7e7 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -33,13 +33,12 @@
   // The size of a java.lang.Class representing an array.
   static uint32_t ClassSize();
 
-  // Allocates an array with the given properties, if fill_usable is true the array will be of at
+  // Allocates an array with the given properties, if kFillUsable is true the array will be of at
   // least component_count size, however, if there's usable space at the end of the allocation the
   // array will fill it.
-  template <bool kIsInstrumented>
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
-                      size_t component_size, gc::AllocatorType allocator_type,
-                      bool fill_usable = false)
+  template <bool kIsInstrumented, bool kFillUsable = false>
+  ALWAYS_INLINE static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
+                                    size_t component_size_shift, gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, Handle<Class> element_class,
@@ -66,12 +65,11 @@
   }
 
   static MemberOffset DataOffset(size_t component_size) {
-    if (component_size != sizeof(int64_t)) {
-      return OFFSET_OF_OBJECT_MEMBER(Array, first_element_);
-    } else {
-      // Align longs and doubles.
-      return MemberOffset(OFFSETOF_MEMBER(Array, first_element_) + 4);
-    }
+    DCHECK(IsPowerOfTwo(component_size)) << component_size;
+    size_t data_offset = RoundUp(OFFSETOF_MEMBER(Array, first_element_), component_size);
+    DCHECK_EQ(RoundUp(data_offset, component_size), data_offset)
+        << "Array data offset isn't aligned with component size";
+    return MemberOffset(data_offset);
   }
 
   void* GetRawData(size_t component_size, int32_t index)
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 3f67468..3d3ae16 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -510,8 +510,19 @@
 template<VerifyObjectFlags kVerifyFlags>
 inline Primitive::Type Class::GetPrimitiveType() {
   DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-  return static_cast<Primitive::Type>(
-      GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_)));
+  int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
+  Primitive::Type type = static_cast<Primitive::Type>(v32 & 0xFFFF);
+  DCHECK_EQ(static_cast<size_t>(v32 >> 16), Primitive::ComponentSizeShift(type));
+  return type;
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline size_t Class::GetPrimitiveTypeSizeShift() {
+  DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
+  int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
+  size_t size_shift = static_cast<Primitive::Type>(v32 >> 16);
+  DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & 0xFFFF)));
+  return size_shift;
 }
 
 inline void Class::CheckObjectAlloc() {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 4a8d6dc..0acf695 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -345,9 +345,16 @@
 
   void SetPrimitiveType(Primitive::Type new_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), new_type);
+    int32_t v32 = static_cast<int32_t>(new_type);
+    DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero";
+    // Store the component size shift in the upper 16 bits.
+    v32 |= Primitive::ComponentSizeShift(new_type) << 16;
+    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  size_t GetPrimitiveTypeSizeShift() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Returns true if the class is a primitive type.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsPrimitive() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -457,8 +464,12 @@
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return Primitive::ComponentSize(
-        GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveType());
+    return 1U << GetComponentSizeShift();
+  }
+
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  size_t GetComponentSizeShift() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveTypeSizeShift();
   }
 
   bool IsObjectClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1149,8 +1160,9 @@
   // See also class_size_.
   uint32_t object_size_;
 
-  // Primitive type value, or Primitive::kPrimNot (0); set for generated primitive classes.
-  Primitive::Type primitive_type_;
+  // The lower 16 bits contains a Primitive::Type value. The upper 16
+  // bits contains the size shift of the primitive type.
+  uint32_t primitive_type_;
 
   // Bitmap of offsets of ifields.
   uint32_t reference_instance_offsets_;
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index c7540dc..0ca44f8 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -35,10 +35,13 @@
 inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
                                              int32_t length, gc::AllocatorType allocator_type) {
   Array* array = Array::Alloc<true>(self, object_array_class, length,
-                                    sizeof(HeapReference<Object>), allocator_type);
+                                    ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>(),
+                                    allocator_type);
   if (UNLIKELY(array == nullptr)) {
     return nullptr;
   } else {
+    DCHECK_EQ(array->GetClass()->GetComponentSizeShift(),
+              ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>());
     return array->AsObjectArray<T>();
   }
 }
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 1290a3d..7fa664d 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -162,19 +162,19 @@
   Class* c = class_linker_->FindSystemClass(soa.Self(), "[I");
   StackHandleScope<1> hs(soa.Self());
   MutableHandle<Array> a(
-      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
                                       Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
                               Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
                               Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
@@ -185,26 +185,26 @@
   Class* c = class_linker_->FindSystemClass(soa.Self(), "[B");
   StackHandleScope<1> hs(soa.Self());
   MutableHandle<Array> a(
-      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
-                                      Runtime::Current()->GetHeap()->GetCurrentAllocator(), true)));
+      hs.NewHandle(Array::Alloc<true, true>(soa.Self(), c, 1, c->GetComponentSizeShift(),
+                                            Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[I");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                              Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
+  a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(),
+                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                              Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
+  a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(),
+                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
-  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                             Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
+  a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(),
+                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 }
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index e1d9fc7..64d4fe2 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -73,7 +73,8 @@
   }
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentNonMovingAllocator();
   mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                     array_class->GetComponentSize(), allocator);
+                                                     array_class->GetComponentSizeShift(),
+                                                     allocator);
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -95,9 +96,9 @@
     return nullptr;
   }
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
-  mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                     array_class->GetComponentSize(), allocator,
-                                                     true);
+  mirror::Array* result = mirror::Array::Alloc<true, true>(soa.Self(), array_class, length,
+                                                           array_class->GetComponentSizeShift(),
+                                                           allocator);
   return soa.AddLocalReference<jobject>(result);
 }
 
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 058458f..763a664 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -59,9 +59,10 @@
     return NULL;
   }
   DCHECK(array_class->IsObjectArrayClass());
-  mirror::Array* new_array = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                        sizeof(mirror::HeapReference<mirror::Object>),
-                                                        runtime->GetHeap()->GetCurrentAllocator());
+  mirror::Array* new_array = mirror::Array::Alloc<true>(
+      soa.Self(), array_class, length,
+      ComponentSizeShiftWidth<sizeof(mirror::HeapReference<mirror::Object>)>(),
+      runtime->GetHeap()->GetCurrentAllocator());
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/oat.cc b/runtime/oat.cc
index fc0ccb1..6810d73 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '4', '1', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '4', '2', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 36ad662..afcc64d 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -26,6 +26,24 @@
 
 static constexpr size_t kObjectReferenceSize = 4;
 
+
+template<size_t kComponentSize>
+size_t ComponentSizeShiftWidth() {
+  switch (kComponentSize) {
+    case 1:
+      return 0U;
+    case 2:
+      return 1U;
+    case 4:
+      return 2U;
+    case 8:
+      return 3U;
+    default:
+      LOG(FATAL) << "Unexpected component size : " << kComponentSize;
+      return 0U;
+  }
+}
+
 class Primitive {
  public:
   enum Type {
@@ -66,6 +84,24 @@
     }
   }
 
+  static size_t ComponentSizeShift(Type type) {
+    switch (type) {
+      case kPrimVoid:
+      case kPrimBoolean:
+      case kPrimByte:    return 0;
+      case kPrimChar:
+      case kPrimShort:   return 1;
+      case kPrimInt:
+      case kPrimFloat:   return 2;
+      case kPrimLong:
+      case kPrimDouble:  return 3;
+      case kPrimNot:     return ComponentSizeShiftWidth<kObjectReferenceSize>();
+      default:
+        LOG(FATAL) << "Invalid type " << static_cast<int>(type);
+        return 0;
+    }
+  }
+
   static size_t ComponentSize(Type type) {
     switch (type) {
       case kPrimVoid:    return 0;
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 432a2fe..a14889c 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -89,7 +89,7 @@
   Handle<mirror::Array> h_obj(
       hs.NewHandle(
           mirror::Array::Alloc<true>(soa.Self(), h_klass.Get(), kArraySize,
-                                     h_klass->GetComponentSize(),
+                                     h_klass->GetComponentSizeShift(),
                                      Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   ASSERT_TRUE(h_obj.Get() != nullptr);
   ASSERT_EQ(h_obj->GetClass(), h_klass.Get());