Fix broken runtime SetStatsEnabled logic

Previously, Runtime::SetStatsEnabled wouldn't take stats_enabled_
into account when deciding whether or not to increment / decrement
teh stats enabled counter. This resulted in counter underflows and
other errors which caused some CTS tests to fail.

Also added some locking to prevent race conditions.

Bug: 17360878

(cherry picked from commit a98ffd745bbecb2e84a492194950c0b94966546b)

Change-Id: I21d241a58d35bd6a607aa2305c6da81720bd0886
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b32e042..4bb388f 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -361,6 +361,10 @@
   }
 
   Runtime* runtime = Runtime::Current();
+
+  // Enable count of allocs if specified in the flags.
+  bool enable_stats = false;
+
   runtime->GetThreadList()->SuspendAll();
 
   // Create Trace object.
@@ -369,13 +373,8 @@
     if (the_trace_ != NULL) {
       LOG(ERROR) << "Trace already in progress, ignoring this request";
     } else {
+      enable_stats = (flags && kTraceCountAllocs) != 0;
       the_trace_ = new Trace(trace_file.release(), buffer_size, flags, sampling_enabled);
-
-      // Enable count of allocs if specified in the flags.
-      if ((flags && kTraceCountAllocs) != 0) {
-        runtime->SetStatsEnabled(true, true);
-      }
-
       if (sampling_enabled) {
         CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, NULL, &RunSamplingThread,
                                             reinterpret_cast<void*>(interval_us)),
@@ -391,9 +390,15 @@
   }
 
   runtime->GetThreadList()->ResumeAll();
+
+  // Can't call this when holding the mutator lock.
+  if (enable_stats) {
+    runtime->SetStatsEnabled(true);
+  }
 }
 
 void Trace::Stop() {
+  bool stop_alloc_counting = false;
   Runtime* runtime = Runtime::Current();
   runtime->GetThreadList()->SuspendAll();
   Trace* the_trace = NULL;
@@ -409,6 +414,7 @@
     }
   }
   if (the_trace != NULL) {
+    stop_alloc_counting = (the_trace->flags_ & kTraceCountAllocs) != 0;
     the_trace->FinishTracing();
 
     if (the_trace->sampling_enabled_) {
@@ -425,6 +431,11 @@
   }
   runtime->GetThreadList()->ResumeAll();
 
+  if (stop_alloc_counting) {
+    // Can be racy since SetStatsEnabled is not guarded by any locks.
+    Runtime::Current()->SetStatsEnabled(false);
+  }
+
   if (sampling_pthread != 0U) {
     CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, NULL), "sampling thread shutdown");
     sampling_pthread_ = 0U;
@@ -489,10 +500,6 @@
 
   size_t final_offset = cur_offset_.LoadRelaxed();
 
-  if ((flags_ & kTraceCountAllocs) != 0) {
-    Runtime::Current()->SetStatsEnabled(false, true);
-  }
-
   std::set<mirror::ArtMethod*> visited_methods;
   GetVisitedMethods(final_offset, &visited_methods);