More advanced timing loggers.

The new timing loggers have lower overhead since they only push into
a vector. The new format has two types, a start timing and a stop
timing. You can thing of these as brackets associated with a
timestamp. It uses these to construct various statistics when needed,
such as: Total time, exclusive time, and nesting depth.

Changed PrettyDuration to have a default of 3 digits after the decimal
point.

Exaple of a GC dump with exclusive / total times and indenting:
I/art     (23546): GC iteration timing logger [Exclusive time] [Total time]
I/art     (23546):   0ms InitializePhase
I/art     (23546):   0.305ms/167.746ms MarkingPhase
I/art     (23546):     0ms BindBitmaps
I/art     (23546):     0ms FindDefaultSpaceBitmap
I/art     (23546):     0ms/1.709ms ProcessCards
I/art     (23546):       0.183ms ImageModUnionClearCards
I/art     (23546):       0.916ms ZygoteModUnionClearCards
I/art     (23546):       0.610ms AllocSpaceClearCards
I/art     (23546):       1.373ms AllocSpaceClearCards
I/art     (23546):     0.305ms/6.318ms MarkRoots
I/art     (23546):       2.106ms MarkRootsCheckpoint
I/art     (23546):       0.153ms MarkNonThreadRoots
I/art     (23546):       4.287ms MarkConcurrentRoots
I/art     (23546):     43.461ms UpdateAndMarkImageModUnionTable
I/art     (23546):     0ms/112.712ms RecursiveMark
I/art     (23546):       112.712ms ProcessMarkStack
I/art     (23546):     0.610ms/2.777ms PreCleanCards
I/art     (23546):       0.305ms/0.855ms ProcessCards
I/art     (23546):         0.153ms ImageModUnionClearCards
I/art     (23546):         0.610ms ZygoteModUnionClearCards
I/art     (23546):         0.610ms AllocSpaceClearCards
I/art     (23546):         0.549ms AllocSpaceClearCards
I/art     (23546):       0.549ms MarkRootsCheckpoint
I/art     (23546):       0.610ms MarkNonThreadRoots
I/art     (23546):       0ms MarkConcurrentRoots
I/art     (23546):       0.610ms ScanGrayImageSpaceObjects
I/art     (23546):       0.305ms ScanGrayZygoteSpaceObjects
I/art     (23546):       0.305ms ScanGrayAllocSpaceObjects
I/art     (23546):       1.129ms ScanGrayAllocSpaceObjects
I/art     (23546):       0ms ProcessMarkStack
I/art     (23546):   0ms/0.977ms (Paused)PausePhase
I/art     (23546):     0.244ms ReMarkRoots
I/art     (23546):     0.672ms (Paused)ScanGrayObjects
I/art     (23546):     0ms (Paused)ProcessMarkStack
I/art     (23546):     0ms/0.610ms SwapStacks
I/art     (23546):       0.610ms RevokeAllThreadLocalAllocationStacks
I/art     (23546):     0ms PreSweepingGcVerification
I/art     (23546):   0ms/10.621ms ReclaimPhase
I/art     (23546):     0.610ms/0.702ms ProcessReferences
I/art     (23546):       0.214ms/0.641ms EnqueueFinalizerReferences
I/art     (23546):         0.427ms ProcessMarkStack
I/art     (23546):     0.488ms SweepSystemWeaks
I/art     (23546):     0.824ms/9.400ms Sweep
I/art     (23546):       0ms SweepMallocSpace
I/art     (23546):       0.214ms SweepZygoteSpace
I/art     (23546):       0.122ms SweepMallocSpace
I/art     (23546):       6.226ms SweepMallocSpace
I/art     (23546):       0ms SweepMallocSpace
I/art     (23546):       2.144ms SweepLargeObjects
I/art     (23546):     0.305ms SwapBitmaps
I/art     (23546):     0ms UnBindBitmaps
I/art     (23546):   0.275ms FinishPhase
I/art     (23546): GC iteration timing logger: end, 178.971ms

Change-Id: Ia55b65609468f212b3cd65cda66b843da42be645
diff --git a/runtime/utils.cc b/runtime/utils.cc
index f60f795..e5b8b22 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -468,11 +468,12 @@
                       negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 }
 
-std::string PrettyDuration(uint64_t nano_duration) {
+std::string PrettyDuration(uint64_t nano_duration, size_t max_fraction_digits) {
   if (nano_duration == 0) {
     return "0";
   } else {
-    return FormatDuration(nano_duration, GetAppropriateTimeUnit(nano_duration));
+    return FormatDuration(nano_duration, GetAppropriateTimeUnit(nano_duration),
+                          max_fraction_digits);
   }
 }
 
@@ -509,45 +510,41 @@
   return 0;
 }
 
-std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit) {
-  const char* unit = NULL;
+std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit,
+                           size_t max_fraction_digits) {
+  const char* unit = nullptr;
   uint64_t divisor = GetNsToTimeUnitDivisor(time_unit);
-  uint32_t zero_fill = 1;
   switch (time_unit) {
     case kTimeUnitSecond:
       unit = "s";
-      zero_fill = 9;
       break;
     case kTimeUnitMillisecond:
       unit = "ms";
-      zero_fill = 6;
       break;
     case kTimeUnitMicrosecond:
       unit = "us";
-      zero_fill = 3;
       break;
     case kTimeUnitNanosecond:
       unit = "ns";
-      zero_fill = 0;
       break;
   }
-
-  uint64_t whole_part = nano_duration / divisor;
+  const uint64_t whole_part = nano_duration / divisor;
   uint64_t fractional_part = nano_duration % divisor;
   if (fractional_part == 0) {
     return StringPrintf("%" PRIu64 "%s", whole_part, unit);
   } else {
-    while ((fractional_part % 1000) == 0) {
-      zero_fill -= 3;
-      fractional_part /= 1000;
+    static constexpr size_t kMaxDigits = 30;
+    char fraction_buffer[kMaxDigits];
+    char* ptr = fraction_buffer;
+    uint64_t multiplier = 10;
+    // This infinite loops if fractional part is 0.
+    while (fractional_part * multiplier < divisor) {
+      multiplier *= 10;
+      *ptr++ = '0';
     }
-    if (zero_fill == 3) {
-      return StringPrintf("%" PRIu64 ".%03" PRIu64 "%s", whole_part, fractional_part, unit);
-    } else if (zero_fill == 6) {
-      return StringPrintf("%" PRIu64 ".%06" PRIu64 "%s", whole_part, fractional_part, unit);
-    } else {
-      return StringPrintf("%" PRIu64 ".%09" PRIu64 "%s", whole_part, fractional_part, unit);
-    }
+    sprintf(ptr, "%" PRIu64, fractional_part);
+    fraction_buffer[std::min(kMaxDigits - 1, max_fraction_digits)] = '\0';
+    return StringPrintf("%" PRIu64 ".%s%s", whole_part, fraction_buffer, unit);
   }
 }