New arena memory allocator.

Before we were creating arenas for each method. The issue with doing this
is that we needed to memset each memory allocation. This can be improved
if you start out with arenas that contain all zeroed memory and recycle
them for each method. When you give memory back to the arena pool you do
a single memset to zero out all of the memory that you used.

Always inlined the fast path of the allocation code.

Removed the "zero" parameter since the new arena allocator always returns
zeroed memory.

Host dex2oat time on target oat apks (2 samples each).
Before:
real	1m11.958s
user	4m34.020s
sys	1m28.570s

After:
real	1m9.690s
user	4m17.670s
sys	1m23.960s

Target device dex2oat samples (Mako, Thinkfree.apk):
Without new arena allocator:
0m26.47s real     0m54.60s user     0m25.85s system
0m25.91s real     0m54.39s user     0m26.69s system
0m26.61s real     0m53.77s user     0m27.35s system
0m26.33s real     0m54.90s user     0m25.30s system
0m26.34s real     0m53.94s user     0m27.23s system

With new arena allocator:
0m25.02s real     0m54.46s user     0m19.94s system
0m25.17s real     0m55.06s user     0m20.72s system
0m24.85s real     0m55.14s user     0m19.30s system
0m24.59s real     0m54.02s user     0m20.07s system
0m25.06s real     0m55.00s user     0m20.42s system

Correctness of Thinkfree.apk.oat verified by diffing both of the oat files.

Change-Id: I5ff7b85ffe86c57d3434294ca7a621a695bf57a9
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 2d8e24f..2dbe5f5 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -316,13 +316,12 @@
   }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true,
-                                               ArenaAllocator::kAllocData));
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true,
-                                                       ArenaAllocator::kAllocLIR));
+  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
+                                                      ArenaAllocator::kAllocLIR));
   switch_tables_.Insert(tab_rec);
 
   // Get the switch value
@@ -365,13 +364,12 @@
   }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true,
-                                               ArenaAllocator::kAllocData));
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int size = table[1];
   tab_rec->targets =
-      static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true, ArenaAllocator::kAllocLIR));
+      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), ArenaAllocator::kAllocLIR));
   switch_tables_.Insert(tab_rec);
 
   // Get the switch value
@@ -419,8 +417,7 @@
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   // Add the table to the list - we'll process it later
   FillArrayData *tab_rec =
-      static_cast<FillArrayData*>(arena_->NewMem(sizeof(FillArrayData), true,
-                                                 ArenaAllocator::kAllocData));
+      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint16_t width = tab_rec->table[1];
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 47d3d97..6cc3052 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -538,16 +538,14 @@
   int num_temps = sizeof(core_temps)/sizeof(*core_temps);
   int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs);
   int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps);
-  reg_pool_ = static_cast<RegisterPool*>(arena_->NewMem(sizeof(*reg_pool_), true,
-                                                        ArenaAllocator::kAllocRegAlloc));
+  reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_),
+                                                       ArenaAllocator::kAllocRegAlloc));
   reg_pool_->num_core_regs = num_regs;
   reg_pool_->core_regs = reinterpret_cast<RegisterInfo*>
-      (arena_->NewMem(num_regs * sizeof(*reg_pool_->core_regs), true,
-                     ArenaAllocator::kAllocRegAlloc));
+      (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), ArenaAllocator::kAllocRegAlloc));
   reg_pool_->num_fp_regs = num_fp_regs;
   reg_pool_->FPRegs = static_cast<RegisterInfo*>
-      (arena_->NewMem(num_fp_regs * sizeof(*reg_pool_->FPRegs), true,
-                      ArenaAllocator::kAllocRegAlloc));
+      (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), ArenaAllocator::kAllocRegAlloc));
   CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs);
   CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs);
   // Keep special registers from being allocated
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5f6f3d5..d89f1ed 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -321,7 +321,7 @@
 LIR* Mir2Lir::AddWordData(LIR* *constant_list_p, int value) {
   /* Add the constant to the literal pool */
   if (constant_list_p) {
-    LIR* new_value = static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocData));
+    LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocData));
     new_value->operands[0] = value;
     new_value->next = *constant_list_p;
     *constant_list_p = new_value;
@@ -793,7 +793,7 @@
   if (it == boundary_map_.end()) {
     LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
   }
-  LIR* new_label = static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR));
+  LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
   new_label->dalvik_offset = vaddr;
   new_label->opcode = kPseudoCaseLabel;
   new_label->operands[0] = keyVal;
@@ -961,8 +961,8 @@
       first_lir_insn_(NULL),
       last_lir_insn_(NULL) {
   promotion_map_ = static_cast<PromotionMap*>
-      (arena_->NewMem((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
-                      sizeof(promotion_map_[0]), true, ArenaAllocator::kAllocRegAlloc));
+      (arena_->Alloc((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
+                      sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
 }
 
 void Mir2Lir::Materialize() {
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 2e9c845..630e990 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -249,7 +249,7 @@
         /* Only sink store instructions */
         if (sink_distance && !is_this_lir_load) {
           LIR* new_store_lir =
-              static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR));
+              static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
           *new_store_lir = *this_lir;
           /*
            * Stop point found - insert *before* the check_lir
@@ -446,7 +446,7 @@
       if (slot >= 0) {
         LIR* cur_lir = prev_inst_list[slot];
         LIR* new_load_lir =
-          static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR));
+          static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
         *new_load_lir = *this_lir;
         /*
          * Insertion is guaranteed to succeed since check_lir
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index eaae0e1..d53c012 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -67,13 +67,12 @@
   }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true,
-                                               ArenaAllocator::kAllocData));
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int elements = table[1];
   tab_rec->targets =
-      static_cast<LIR**>(arena_->NewMem(elements * sizeof(LIR*), true, ArenaAllocator::kAllocLIR));
+      static_cast<LIR**>(arena_->Alloc(elements * sizeof(LIR*), ArenaAllocator::kAllocLIR));
   switch_tables_.Insert(tab_rec);
 
   // The table is composed of 8-byte key/disp pairs
@@ -147,12 +146,11 @@
   }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true,
-                                               ArenaAllocator::kAllocData));
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true,
+  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
                                                        ArenaAllocator::kAllocLIR));
   switch_tables_.Insert(tab_rec);
 
@@ -228,8 +226,8 @@
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   // Add the table to the list - we'll process it later
   FillArrayData *tab_rec =
-      reinterpret_cast<FillArrayData*>(arena_->NewMem(sizeof(FillArrayData), true,
-                                                      ArenaAllocator::kAllocData));
+      reinterpret_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData),
+                                                     ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint16_t width = tab_rec->table[1];
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 7a9e91a..4ee5b23 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -462,16 +462,14 @@
   int num_temps = sizeof(core_temps)/sizeof(*core_temps);
   int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs);
   int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps);
-  reg_pool_ = static_cast<RegisterPool*>(arena_->NewMem(sizeof(*reg_pool_), true,
-                                                        ArenaAllocator::kAllocRegAlloc));
+  reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_),
+                                                       ArenaAllocator::kAllocRegAlloc));
   reg_pool_->num_core_regs = num_regs;
   reg_pool_->core_regs = static_cast<RegisterInfo*>
-     (arena_->NewMem(num_regs * sizeof(*reg_pool_->core_regs), true,
-                     ArenaAllocator::kAllocRegAlloc));
+     (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), ArenaAllocator::kAllocRegAlloc));
   reg_pool_->num_fp_regs = num_fp_regs;
   reg_pool_->FPRegs = static_cast<RegisterInfo*>
-      (arena_->NewMem(num_fp_regs * sizeof(*reg_pool_->FPRegs), true,
-                      ArenaAllocator::kAllocRegAlloc));
+      (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), ArenaAllocator::kAllocRegAlloc));
   CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs);
   CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs);
   // Keep special registers from being allocated
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index d9aef5d..440df2a 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -40,7 +40,7 @@
 
 inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0,
                             int op1, int op2, int op3, int op4, LIR* target) {
-  LIR* insn = static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR));
+  LIR* insn = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
   insn->dalvik_offset = dalvik_offset;
   insn->opcode = opcode;
   insn->operands[0] = op0;
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 862c1d7..c41feb1 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -812,8 +812,8 @@
 void Mir2Lir::MethodMIR2LIR() {
   // Hold the labels of each block.
   block_label_list_ =
-      static_cast<LIR*>(arena_->NewMem(sizeof(LIR) * mir_graph_->GetNumBlocks(), true,
-                                       ArenaAllocator::kAllocLIR));
+      static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(),
+                                      ArenaAllocator::kAllocLIR));
 
   PreOrderDfsIterator iter(mir_graph_, false /* not iterative */);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index a0b98dd..71b74a4 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -971,11 +971,11 @@
    * to describe register live ranges for GC.
    */
   RefCounts *core_regs =
-      static_cast<RefCounts*>(arena_->NewMem(sizeof(RefCounts) * num_regs, true,
-                                             ArenaAllocator::kAllocRegAlloc));
+      static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs,
+                                            ArenaAllocator::kAllocRegAlloc));
   RefCounts *FpRegs =
-      static_cast<RefCounts *>(arena_->NewMem(sizeof(RefCounts) * num_regs, true,
-                                              ArenaAllocator::kAllocRegAlloc));
+      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs,
+                                             ArenaAllocator::kAllocRegAlloc));
   // Set ssa names for original Dalvik registers
   for (int i = 0; i < dalvik_regs; i++) {
     core_regs[i].s_reg = FpRegs[i].s_reg = i;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 6e3e55f..2be2aa9 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -74,13 +74,12 @@
   }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
-      static_cast<SwitchTable *>(arena_->NewMem(sizeof(SwitchTable), true,
-                                                ArenaAllocator::kAllocData));
+      static_cast<SwitchTable *>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true,
-                                                       ArenaAllocator::kAllocLIR));
+  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
+                                                      ArenaAllocator::kAllocLIR));
   switch_tables_.Insert(tab_rec);
 
   // Get the switch value
@@ -131,8 +130,7 @@
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   // Add the table to the list - we'll process it later
   FillArrayData *tab_rec =
-      static_cast<FillArrayData*>(arena_->NewMem(sizeof(FillArrayData), true,
-                                                 ArenaAllocator::kAllocData));
+      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData));
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint16_t width = tab_rec->table[1];
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 699f3ae..26accab 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -438,16 +438,16 @@
   int num_temps = sizeof(core_temps)/sizeof(*core_temps);
   int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs);
   int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps);
-  reg_pool_ = static_cast<RegisterPool*>(arena_->NewMem(sizeof(*reg_pool_), true,
-                                                        ArenaAllocator::kAllocRegAlloc));
+  reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_),
+                                                       ArenaAllocator::kAllocRegAlloc));
   reg_pool_->num_core_regs = num_regs;
   reg_pool_->core_regs =
-      static_cast<RegisterInfo*>(arena_->NewMem(num_regs * sizeof(*reg_pool_->core_regs), true,
-                                                ArenaAllocator::kAllocRegAlloc));
+      static_cast<RegisterInfo*>(arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs),
+                                               ArenaAllocator::kAllocRegAlloc));
   reg_pool_->num_fp_regs = num_fp_regs;
   reg_pool_->FPRegs =
-      static_cast<RegisterInfo *>(arena_->NewMem(num_fp_regs * sizeof(*reg_pool_->FPRegs), true,
-                                                 ArenaAllocator::kAllocRegAlloc));
+      static_cast<RegisterInfo *>(arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs),
+                                                ArenaAllocator::kAllocRegAlloc));
   CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs);
   CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs);
   // Keep special registers from being allocated