Move BenchTimer to tools as Timer

This breaks a bunch of circular dependencies between tools and gm and bench.

BUG=skia:

Committed: https://skia.googlesource.com/skia/+/4ed75287aed6371c6e4a41ffcc78c8a49c9810ed

CQ_EXTRA_TRYBOTS=tryserver.skia:Build-Mac10.7-Clang-Arm7-Debug-iOS-Trybot,Test-Ubuntu12-ShuttleA-GTX660-x86-Debug-Trybot
R=tfarina@chromium.org, mtklein@google.com

Author: mtklein@chromium.org

Review URL: https://codereview.chromium.org/344213003
diff --git a/tools/DumpRecord.cpp b/tools/DumpRecord.cpp
index 2376fb9..6e679a5 100644
--- a/tools/DumpRecord.cpp
+++ b/tools/DumpRecord.cpp
@@ -10,8 +10,8 @@
 #include "SkRecord.h"
 #include "SkRecordDraw.h"
 
-#include "BenchTimer.h"
 #include "DumpRecord.h"
+#include "Timer.h"
 
 namespace {
 
@@ -33,7 +33,7 @@
 
     template <typename T>
     void operator()(const T& command) {
-        BenchTimer timer;
+        Timer timer;
         timer.start();
             fDraw(command);
         timer.end();
diff --git a/tools/PictureBenchmark.cpp b/tools/PictureBenchmark.cpp
index 30967c7..85a49f4 100644
--- a/tools/PictureBenchmark.cpp
+++ b/tools/PictureBenchmark.cpp
@@ -5,7 +5,7 @@
  * found in the LICENSE file.
  */
 
-#include "BenchTimer.h"
+#include "Timer.h"
 #include "PictureBenchmark.h"
 #include "SkCanvas.h"
 #include "SkPicture.h"
@@ -42,13 +42,13 @@
     fTimerTypes |= gpu ? TimerData::kGpu_Flag : 0;
 }
 
-BenchTimer* PictureBenchmark::setupTimer(bool useGLTimer) {
+Timer* PictureBenchmark::setupTimer(bool useGLTimer) {
 #if SK_SUPPORT_GPU
     if (useGLTimer && fRenderer != NULL && fRenderer->isUsingGpuDevice()) {
-        return SkNEW_ARGS(BenchTimer, (fRenderer->getGLContext()));
+        return SkNEW_ARGS(Timer, (fRenderer->getGLContext()));
     }
 #endif
-    return SkNEW_ARGS(BenchTimer, (NULL));
+    return SkNEW_ARGS(Timer, (NULL));
 }
 
 PictureRenderer* PictureBenchmark::setRenderer(sk_tools::PictureRenderer* renderer) {
@@ -147,11 +147,11 @@
             // seems to cause problems (i.e., INVALID_OPERATIONs) on several
             // platforms. To work around this, we disable the gpu timer on the
             // long running timer.
-            SkAutoTDelete<BenchTimer> longRunningTimer(this->setupTimer());
+            SkAutoTDelete<Timer> longRunningTimer(this->setupTimer());
             TimerData longRunningTimerData(numOuterLoops);
 
             for (int outer = 0; outer < numOuterLoops; ++outer) {
-                SkAutoTDelete<BenchTimer> perTileTimer(this->setupTimer(false));
+                SkAutoTDelete<Timer> perTileTimer(this->setupTimer(false));
                 TimerData perTileTimerData(numInnerLoops);
 
                 longRunningTimer->start();
@@ -201,11 +201,11 @@
                 numInnerLoops);
         }
     } else {
-        SkAutoTDelete<BenchTimer> longRunningTimer(this->setupTimer());
+        SkAutoTDelete<Timer> longRunningTimer(this->setupTimer());
         TimerData longRunningTimerData(numOuterLoops);
 
         for (int outer = 0; outer < numOuterLoops; ++outer) {
-            SkAutoTDelete<BenchTimer> perRunTimer(this->setupTimer(false));
+            SkAutoTDelete<Timer> perRunTimer(this->setupTimer(false));
             TimerData perRunTimerData(numInnerLoops);
 
             longRunningTimer->start();
diff --git a/tools/PictureBenchmark.h b/tools/PictureBenchmark.h
index 142d526..1ddd18e 100644
--- a/tools/PictureBenchmark.h
+++ b/tools/PictureBenchmark.h
@@ -13,8 +13,8 @@
 #include "SkTypes.h"
 #include "TimerData.h"
 
-class BenchTimer;
 class SkPicture;
+class Timer;
 
 namespace sk_tools {
 
@@ -67,7 +67,7 @@
 
     PictureResultsWriter* fWriter;
 
-    BenchTimer* setupTimer(bool useGLTimer = true);
+    Timer* setupTimer(bool useGLTimer = true);
 };
 
 }
diff --git a/tools/bbh_shootout.cpp b/tools/bbh_shootout.cpp
index e657917..64fc6d8 100644
--- a/tools/bbh_shootout.cpp
+++ b/tools/bbh_shootout.cpp
@@ -5,7 +5,7 @@
  * found in the LICENSE file.
  */
 
-#include "BenchTimer.h"
+#include "Timer.h"
 #include "Benchmark.h"
 #include "LazyDecodeBitmap.h"
 #include "PictureBenchmark.h"
@@ -64,7 +64,7 @@
         BBoxType bBoxType,
         SkPicture* pic,
         const int numRepeats,
-        BenchTimer* timer) {
+        Timer* timer) {
     renderer->setBBoxHierarchyType(bBoxType);
     renderer->setGridSize(FLAGS_tilesize, FLAGS_tilesize);
     renderer->init(pic, NULL, NULL, NULL, false);
@@ -106,14 +106,14 @@
             if (!includeBBoxType[bBoxType]) { continue; }
             if (FLAGS_playback > 0) {
                 sk_tools::TiledPictureRenderer playbackRenderer;
-                BenchTimer playbackTimer;
+                Timer playbackTimer;
                 do_benchmark_work(&playbackRenderer, (BBoxType)bBoxType,
                                   picture, FLAGS_playback, &playbackTimer);
                 measurement.fPlaybackAverage[bBoxType] = playbackTimer.fCpu;
             }
             if (FLAGS_record > 0) {
                 sk_tools::RecordPictureRenderer recordRenderer;
-                BenchTimer recordTimer;
+                Timer recordTimer;
                 do_benchmark_work(&recordRenderer, (BBoxType)bBoxType,
                                   picture, FLAGS_record, &recordTimer);
                 measurement.fRecordAverage[bBoxType] = recordTimer.fCpu;
diff --git a/tools/bench_pictures_main.cpp b/tools/bench_pictures_main.cpp
index d9b767b..503269af 100644
--- a/tools/bench_pictures_main.cpp
+++ b/tools/bench_pictures_main.cpp
@@ -6,7 +6,7 @@
  */
 
 #include "BenchLogger.h"
-#include "BenchTimer.h"
+#include "Timer.h"
 #include "CopyTilesRenderer.h"
 #include "CrashHandler.h"
 #include "LazyDecodeBitmap.h"
diff --git a/tools/bench_playback.cpp b/tools/bench_playback.cpp
index 26fa1c7..6ebe19d 100644
--- a/tools/bench_playback.cpp
+++ b/tools/bench_playback.cpp
@@ -16,10 +16,8 @@
 
 #include "../include/record/SkRecording.h"
 
-#include "BenchTimer.h"
 #include "Stats.h"
-
-typedef WallTimer Timer;
+#include "Timer.h"
 
 __SK_FORCE_IMAGE_DECODER_LINKING;
 
@@ -78,15 +76,16 @@
     // Draw once to warm any caches.  The first sample otherwise can be very noisy.
     draw(*record, *picture, canvas.get());
 
-    Timer timer;
+    WallTimer timer;
+    const double scale = timescale();
     SkAutoTMalloc<double> samples(FLAGS_samples);
     for (int i = 0; i < FLAGS_samples; i++) {
         // We assume timer overhead (typically, ~30ns) is insignificant
         // compared to draw runtime (at least ~100us, usually several ms).
-        timer.start(timescale());
+        timer.start();
         draw(*record, *picture, canvas.get());
         timer.end();
-        samples[i] = timer.fWall;
+        samples[i] = timer.fWall * scale;
     }
 
     Stats stats(samples.get(), FLAGS_samples);
diff --git a/tools/bench_record.cpp b/tools/bench_record.cpp
index 0024c2c..5457319 100644
--- a/tools/bench_record.cpp
+++ b/tools/bench_record.cpp
@@ -14,11 +14,9 @@
 #include "SkStream.h"
 #include "SkString.h"
 
-#include "BenchTimer.h"
 #include "LazyDecodeBitmap.h"
 #include "Stats.h"
-
-typedef WallTimer Timer;
+#include "Timer.h"
 
 __SK_FORCE_IMAGE_DECODER_LINKING;
 
@@ -81,12 +79,13 @@
     rerecord(src, bbhFactory);
 
     // Rerecord once to see how many times we should loop to make timer overhead insignificant.
-    Timer timer;
+    WallTimer timer;
+    const double scale = timescale();
     do {
-        timer.start(timescale());
+        timer.start();
         rerecord(src, bbhFactory);
         timer.end();
-    } while (timer.fWall < timerOverhead);   // Loop just in case something bizarre happens.
+    } while (timer.fWall * scale < timerOverhead);  // Loop just in case something bizarre happens.
 
     // We want (timer overhead / measurement) to be less than FLAGS_overheadGoal.
     // So in each sample, we'll loop enough times to have made that true for our first measurement.
@@ -94,12 +93,12 @@
 
     SkAutoTMalloc<double> samples(FLAGS_samples);
     for (int i = 0; i < FLAGS_samples; i++) {
-        timer.start(timescale());
+        timer.start();
         for (int j = 0; j < loops; j++) {
             rerecord(src, bbhFactory);
         }
         timer.end();
-        samples[i] = timer.fWall / loops;
+        samples[i] = timer.fWall * scale / loops;
     }
 
     Stats stats(samples.get(), FLAGS_samples);
@@ -132,12 +131,13 @@
 
     // Each run will use this timer overhead estimate to guess how many times it should run.
     static const int kOverheadLoops = 10000000;
-    Timer timer;
+    WallTimer timer;
     double overheadEstimate = 0.0;
+    const double scale = timescale();
     for (int i = 0; i < kOverheadLoops; i++) {
-        timer.start(timescale());
+        timer.start();
         timer.end();
-        overheadEstimate += timer.fWall;
+        overheadEstimate += timer.fWall * scale;
     }
     overheadEstimate /= kOverheadLoops;
 
diff --git a/tools/skpdiff/skpdiff_util.cpp b/tools/skpdiff/skpdiff_util.cpp
index 171721c..983bee2 100644
--- a/tools/skpdiff/skpdiff_util.cpp
+++ b/tools/skpdiff/skpdiff_util.cpp
@@ -83,7 +83,7 @@
 }
 #endif
 
-// TODO refactor BenchTimer to be used here
+// TODO refactor Timer to be used here
 double get_seconds() {
 #if SK_BUILD_FOR_WIN32
     LARGE_INTEGER currentTime;
diff --git a/tools/timer/GpuTimer.cpp b/tools/timer/GpuTimer.cpp
new file mode 100644
index 0000000..aac10a3
--- /dev/null
+++ b/tools/timer/GpuTimer.cpp
@@ -0,0 +1,77 @@
+
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "GpuTimer.h"
+#include "gl/SkGLContextHelper.h"
+#include "gl/GrGLUtil.h"
+
+GpuTimer::GpuTimer(const SkGLContextHelper* glctx) : fContext(glctx) {
+    if (fContext) {
+        fContext->ref();
+        fContext->makeCurrent();
+        fStarted = false;
+        fSupported = GrGLGetVersion(fContext->gl()) > GR_GL_VER(3,3) ||
+                     fContext->hasExtension("GL_ARB_timer_query") ||
+                     fContext->hasExtension("GL_EXT_timer_query");
+
+        if (fSupported) {
+            SK_GL(*fContext, GenQueries(1, &fQuery));
+        }
+    }
+}
+
+GpuTimer::~GpuTimer() {
+    if (fContext) {
+        if (fSupported) {
+            fContext->makeCurrent();
+            SK_GL(*fContext, DeleteQueries(1, &fQuery));
+        }
+        fContext->unref();
+    }
+}
+
+void GpuTimer::start() {
+    if (fContext && fSupported) {
+        fContext->makeCurrent();
+        fStarted = true;
+        SK_GL(*fContext, BeginQuery(GR_GL_TIME_ELAPSED, fQuery));
+    }
+}
+
+/**
+ * It is important to stop the cpu clocks first,
+ * as this will cpu wait for the gpu to finish.
+ */
+double GpuTimer::end() {
+    if (fContext && fSupported) {
+        fStarted = false;
+        fContext->makeCurrent();
+        SK_GL(*fContext, EndQuery(GR_GL_TIME_ELAPSED));
+
+        GrGLint available = 0;
+        while (!available) {
+            SK_GL_NOERRCHECK(*fContext, GetQueryObjectiv(fQuery,
+                                                         GR_GL_QUERY_RESULT_AVAILABLE,
+                                                         &available));
+            // If GetQueryObjectiv is erroring out we need some alternative
+            // means of breaking out of this loop
+            GrGLenum error;
+            SK_GL_RET_NOERRCHECK(*fContext, error, GetError());
+            if (GR_GL_NO_ERROR != error) {
+                break;
+            }
+        }
+        GrGLuint64 totalGPUTimeElapsed = 0;
+        SK_GL(*fContext, GetQueryObjectui64v(fQuery,
+                                             GR_GL_QUERY_RESULT,
+                                             &totalGPUTimeElapsed));
+
+        return totalGPUTimeElapsed / 1000000.0;
+    } else {
+        return 0;
+    }
+}
diff --git a/tools/timer/GpuTimer.h b/tools/timer/GpuTimer.h
new file mode 100644
index 0000000..2100312
--- /dev/null
+++ b/tools/timer/GpuTimer.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef GpuTimer_DEFINED
+#define GpuTimer_DEFINED
+
+class SkGLContextHelper;
+
+class GpuTimer {
+public:
+    GpuTimer(const SkGLContextHelper*);
+    ~GpuTimer();
+    void start();
+    double end();
+private:
+    unsigned fQuery;
+    int fStarted;
+    const SkGLContextHelper* fContext;
+    bool fSupported;
+};
+
+#endif
diff --git a/tools/timer/SysTimer_mach.cpp b/tools/timer/SysTimer_mach.cpp
new file mode 100644
index 0000000..aca12de
--- /dev/null
+++ b/tools/timer/SysTimer_mach.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "SysTimer_mach.h"
+
+static time_value_t mac_cpu_time() {
+    mach_port_t task = mach_task_self();
+    if (task == MACH_PORT_NULL) {
+        time_value_t none = {0, 0};
+        return none;
+    }
+
+    task_thread_times_info thread_info_data;
+    mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT;
+    if (KERN_SUCCESS != task_info(task,
+                                  TASK_THREAD_TIMES_INFO,
+                                  reinterpret_cast<task_info_t>(&thread_info_data),
+                                  &thread_info_count)) {
+        time_value_t none = {0, 0};
+        return none;
+    }
+
+    time_value_add(&thread_info_data.user_time, &thread_info_data.system_time)
+    return thread_info_data.user_time;
+}
+
+static double interval_in_ms(time_value_t start_clock, time_value_t end_clock) {
+    double duration_clock;
+    if ((end_clock.microseconds - start_clock.microseconds) < 0) {
+        duration_clock = (end_clock.seconds - start_clock.seconds-1) * 1000;
+        duration_clock += (1000000 + end_clock.microseconds - start_clock.microseconds) / 1000.0;
+    } else {
+        duration_clock = (end_clock.seconds - start_clock.seconds) * 1000;
+        duration_clock += (end_clock.microseconds - start_clock.microseconds) / 1000.0;
+    }
+    return duration_clock;
+}
+
+void SysTimer::startWall() {
+    fStartWall = mach_absolute_time();
+}
+
+void SysTimer::startCpu() {
+    fStartCpu = mac_cpu_time();
+}
+
+double SysTimer::endCpu() {
+    time_value_t end_cpu = mac_cpu_time();
+    return interval_in_ms(fStartCpu, end_cpu);
+}
+
+double SysTimer::endWall() {
+    uint64_t end_wall = mach_absolute_time();
+
+    uint64_t elapsed = end_wall - fStartWall;
+    mach_timebase_info_data_t sTimebaseInfo;
+    if (KERN_SUCCESS != mach_timebase_info(&sTimebaseInfo)) {
+        return 0;
+    } else {
+        uint64_t elapsedNano = elapsed * sTimebaseInfo.numer / sTimebaseInfo.denom;
+        return elapsedNano / 1000000.0;
+    }
+}
diff --git a/tools/timer/SysTimer_mach.h b/tools/timer/SysTimer_mach.h
new file mode 100644
index 0000000..8c21d57
--- /dev/null
+++ b/tools/timer/SysTimer_mach.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SysTimer_DEFINED
+#define SysTimer_DEFINED
+
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+
+class SysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    time_value_t fStartCpu;
+    uint64_t fStartWall;
+};
+
+#endif
diff --git a/tools/timer/SysTimer_posix.cpp b/tools/timer/SysTimer_posix.cpp
new file mode 100644
index 0000000..4b7d708
--- /dev/null
+++ b/tools/timer/SysTimer_posix.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "SysTimer_posix.h"
+
+static double interval_in_ms(timespec start_clock, timespec end_clock)
+{
+    double duration_clock;
+    if ((end_clock.tv_nsec - start_clock.tv_nsec) < 0) {
+        duration_clock = (end_clock.tv_sec - start_clock.tv_sec - 1) * 1000;
+        duration_clock += (1000000000 + end_clock.tv_nsec - start_clock.tv_nsec) / 1000000.0;
+    } else {
+        duration_clock = (end_clock.tv_sec - start_clock.tv_sec) * 1000;
+        duration_clock += (end_clock.tv_nsec - start_clock.tv_nsec) / 1000000.0;
+    }
+    return duration_clock;
+}
+
+void SysTimer::startWall() {
+    if (-1 == clock_gettime(CLOCK_MONOTONIC, &fWall)) {
+        timespec none = {0, 0};
+        fWall = none;
+    }
+}
+void SysTimer::startCpu() {
+    if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &fCpu)) {
+        timespec none = {0, 0};
+        fCpu = none;
+    }
+}
+
+double SysTimer::endCpu() {
+    timespec end_cpu;
+    if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_cpu)) {
+        timespec none = {0, 0};
+        end_cpu = none;
+    }
+    return interval_in_ms(fCpu, end_cpu);
+}
+
+double SysTimer::endWall() {
+    timespec end_wall;
+    if (-1 == clock_gettime(CLOCK_MONOTONIC, &end_wall)) {
+        timespec none = {0, 0};
+        end_wall = none;
+    }
+    return interval_in_ms(fWall, end_wall);
+}
diff --git a/tools/timer/SysTimer_posix.h b/tools/timer/SysTimer_posix.h
new file mode 100644
index 0000000..1eca909
--- /dev/null
+++ b/tools/timer/SysTimer_posix.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SysTimer_DEFINED
+#define SysTimer_DEFINED
+
+#include <time.h>
+
+class SysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    timespec fCpu;
+    timespec fWall;
+};
+
+#endif
diff --git a/tools/timer/SysTimer_windows.cpp b/tools/timer/SysTimer_windows.cpp
new file mode 100644
index 0000000..2f9d0a5
--- /dev/null
+++ b/tools/timer/SysTimer_windows.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "SysTimer_windows.h"
+
+static ULONGLONG win_cpu_time() {
+    FILETIME createTime;
+    FILETIME exitTime;
+    FILETIME usrTime;
+    FILETIME sysTime;
+    if (0 == GetProcessTimes(GetCurrentProcess(), &createTime, &exitTime, &sysTime, &usrTime)) {
+        return 0;
+    }
+    ULARGE_INTEGER start_cpu_sys;
+    ULARGE_INTEGER start_cpu_usr;
+    start_cpu_sys.LowPart  = sysTime.dwLowDateTime;
+    start_cpu_sys.HighPart = sysTime.dwHighDateTime;
+    start_cpu_usr.LowPart  = usrTime.dwLowDateTime;
+    start_cpu_usr.HighPart = usrTime.dwHighDateTime;
+    return start_cpu_sys.QuadPart + start_cpu_usr.QuadPart;
+}
+
+void SysTimer::startWall() {
+    if (0 == ::QueryPerformanceCounter(&fStartWall)) {
+        fStartWall.QuadPart = 0;
+    }
+}
+void SysTimer::startCpu() {
+    fStartCpu = win_cpu_time();
+}
+
+double SysTimer::endCpu() {
+    ULONGLONG end_cpu = win_cpu_time();
+    return static_cast<double>(end_cpu - fStartCpu) / 10000.0L;
+}
+double SysTimer::endWall() {
+    LARGE_INTEGER end_wall;
+    if (0 == ::QueryPerformanceCounter(&end_wall)) {
+        end_wall.QuadPart = 0;
+    }
+
+    LARGE_INTEGER ticks_elapsed;
+    ticks_elapsed.QuadPart = end_wall.QuadPart - fStartWall.QuadPart;
+
+    LARGE_INTEGER frequency;
+    if (0 == ::QueryPerformanceFrequency(&frequency)) {
+        return 0.0L;
+    } else {
+        return static_cast<double>(ticks_elapsed.QuadPart)
+             / static_cast<double>(frequency.QuadPart)
+             * 1000.0L;
+    }
+}
diff --git a/tools/timer/SysTimer_windows.h b/tools/timer/SysTimer_windows.h
new file mode 100644
index 0000000..62a9445
--- /dev/null
+++ b/tools/timer/SysTimer_windows.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SysTimer_DEFINED
+#define SysTimer_DEFINED
+
+//Time
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+
+class SysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    ULONGLONG fStartCpu;
+    LARGE_INTEGER fStartWall;
+};
+
+#endif
diff --git a/tools/timer/Timer.cpp b/tools/timer/Timer.cpp
new file mode 100644
index 0000000..4f3fc85
--- /dev/null
+++ b/tools/timer/Timer.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "Timer.h"
+
+Timer::Timer(SkGLContextHelper* gl)
+        : fCpu(-1.0)
+        , fWall(-1.0)
+        , fTruncatedCpu(-1.0)
+        , fTruncatedWall(-1.0)
+        , fGpu(-1.0)
+#if SK_SUPPORT_GPU
+        , fGpuTimer(gl)
+#endif
+        {}
+
+void Timer::start() {
+    fSysTimer.startWall();
+    fTruncatedSysTimer.startWall();
+#if SK_SUPPORT_GPU
+    fGpuTimer.start();
+#endif
+    fSysTimer.startCpu();
+    fTruncatedSysTimer.startCpu();
+}
+
+void Timer::end() {
+    fCpu = fSysTimer.endCpu();
+#if SK_SUPPORT_GPU
+    //It is important to stop the cpu clocks first,
+    //as the following will cpu wait for the gpu to finish.
+    fGpu = fGpuTimer.end();
+#endif
+    fWall = fSysTimer.endWall();
+}
+
+void Timer::truncatedEnd() {
+    fTruncatedCpu = fTruncatedSysTimer.endCpu();
+    fTruncatedWall = fTruncatedSysTimer.endWall();
+}
+
+WallTimer::WallTimer() : fWall(-1.0) {}
+
+void WallTimer::start() {
+    fSysTimer.startWall();
+}
+
+void WallTimer::end() {
+    fWall = fSysTimer.endWall();
+}
diff --git a/tools/timer/Timer.h b/tools/timer/Timer.h
new file mode 100644
index 0000000..15c93f5
--- /dev/null
+++ b/tools/timer/Timer.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef Timer_DEFINED
+#define Timer_DEFINED
+
+#include "SkTypes.h"
+
+#if defined(SK_BUILD_FOR_WIN32)
+    #include "SysTimer_windows.h"
+#elif defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS)
+    #include "SysTimer_mach.h"
+#elif defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_ANDROID)
+    #include "SysTimer_posix.h"
+#endif
+
+#if SK_SUPPORT_GPU
+    #include "GpuTimer.h"
+#endif
+
+class SkGLContextHelper;
+
+/**
+ * SysTimers and GpuTimers are implemented orthogonally.
+ * This class combines 2 SysTimers and a GpuTimer into one single,
+ * platform specific Timer with a simple interface. The truncated
+ * timer doesn't include the time required for the GPU to finish
+ * its rendering. It should always be <= the un-truncated system
+ * times and (for GPU configurations) can be used to roughly (very
+ * roughly) gauge the GPU load/backlog.
+ */
+class Timer {
+public:
+    explicit Timer(SkGLContextHelper* gl = NULL);
+
+    void start();
+    void truncatedEnd();
+    void end();
+
+    // All times in milliseconds.
+    double fCpu;
+    double fWall;
+    double fTruncatedCpu;
+    double fTruncatedWall;
+    double fGpu;
+
+private:
+    SysTimer fSysTimer;
+    SysTimer fTruncatedSysTimer;
+#if SK_SUPPORT_GPU
+    GpuTimer fGpuTimer;
+#endif
+};
+
+// Same as Timer above, supporting only fWall but with much lower overhead.
+// (Typically, ~30ns instead of Timer's ~1us.)
+class WallTimer {
+public:
+    WallTimer();
+
+    void start();
+    void end();
+
+    double fWall;  // Milliseconds.
+
+private:
+    SysTimer fSysTimer;
+};
+
+#endif
diff --git a/tools/timer/TimerData.cpp b/tools/timer/TimerData.cpp
new file mode 100644
index 0000000..21529bc
--- /dev/null
+++ b/tools/timer/TimerData.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "TimerData.h"
+
+#include "Timer.h"
+#include <limits>
+
+TimerData::TimerData(int maxNumTimings)
+    : fMaxNumTimings(maxNumTimings)
+    , fCurrTiming(0)
+    , fWallTimes(maxNumTimings)
+    , fTruncatedWallTimes(maxNumTimings)
+    , fCpuTimes(maxNumTimings)
+    , fTruncatedCpuTimes(maxNumTimings)
+    , fGpuTimes(maxNumTimings) {}
+
+bool TimerData::appendTimes(Timer* timer) {
+    SkASSERT(timer != NULL);
+    if (fCurrTiming >= fMaxNumTimings) {
+        return false;
+    }
+
+    fWallTimes[fCurrTiming] = timer->fWall;
+    fTruncatedWallTimes[fCurrTiming] = timer->fTruncatedWall;
+    fCpuTimes[fCurrTiming] = timer->fCpu;
+    fTruncatedCpuTimes[fCurrTiming] = timer->fTruncatedCpu;
+    fGpuTimes[fCurrTiming] = timer->fGpu;
+
+    ++fCurrTiming;
+
+    return true;
+}
+
+SkString TimerData::getResult(const char* doubleFormat,
+                              Result result,
+                              const char *configName,
+                              uint32_t timerFlags,
+                              int itersPerTiming) {
+    SkASSERT(itersPerTiming >= 1);
+
+    if (!fCurrTiming) {
+        return SkString("");
+    }
+
+    int numTimings = fCurrTiming;
+
+    SkString wallStr(" msecs = ");
+    SkString truncWallStr(" Wmsecs = ");
+    SkString cpuStr(" cmsecs = ");
+    SkString truncCpuStr(" Cmsecs = ");
+    SkString gpuStr(" gmsecs = ");
+
+    double wallMin = std::numeric_limits<double>::max();
+    double truncWallMin = std::numeric_limits<double>::max();
+    double cpuMin = std::numeric_limits<double>::max();
+    double truncCpuMin = std::numeric_limits<double>::max();
+    double gpuMin = std::numeric_limits<double>::max();
+
+    double wallSum = 0;
+    double truncWallSum = 0;
+    double cpuSum = 0;
+    double truncCpuSum = 0;
+    double gpuSum = 0;
+
+    for (int i = 0; i < numTimings; ++i) {
+        if (kPerIter_Result == result) {
+            wallStr.appendf(doubleFormat, fWallTimes[i] / itersPerTiming);
+            truncWallStr.appendf(doubleFormat, fTruncatedWallTimes[i] / itersPerTiming);
+            cpuStr.appendf(doubleFormat, fCpuTimes[i] / itersPerTiming);
+            truncCpuStr.appendf(doubleFormat, fTruncatedCpuTimes[i] / itersPerTiming);
+            gpuStr.appendf(doubleFormat, fGpuTimes[i] / itersPerTiming);
+
+            if (i != numTimings - 1) {
+                static const char kSep[] = ", ";
+                wallStr.append(kSep);
+                truncWallStr.append(kSep);
+                cpuStr.append(kSep);
+                truncCpuStr.append(kSep);
+                gpuStr.append(kSep);
+            }
+        } else if (kMin_Result == result) {
+            wallMin = SkTMin(wallMin, fWallTimes[i]);
+            truncWallMin = SkTMin(truncWallMin, fTruncatedWallTimes[i]);
+            cpuMin = SkTMin(cpuMin, fCpuTimes[i]);
+            truncCpuMin = SkTMin(truncCpuMin, fTruncatedCpuTimes[i]);
+            gpuMin = SkTMin(gpuMin, fGpuTimes[i]);
+        } else {
+            SkASSERT(kAvg_Result == result);
+            wallSum += fWallTimes[i];
+            truncWallSum += fTruncatedWallTimes[i];
+            cpuSum += fCpuTimes[i];
+            truncCpuSum += fTruncatedCpuTimes[i];
+        }
+
+        // We always track the GPU sum because whether it is non-zero indicates if valid gpu times
+        // were recorded at all.
+        gpuSum += fGpuTimes[i];
+    }
+
+    if (kMin_Result == result) {
+        wallStr.appendf(doubleFormat, wallMin / itersPerTiming);
+        truncWallStr.appendf(doubleFormat, truncWallMin / itersPerTiming);
+        cpuStr.appendf(doubleFormat, cpuMin / itersPerTiming);
+        truncCpuStr.appendf(doubleFormat, truncCpuMin / itersPerTiming);
+        gpuStr.appendf(doubleFormat, gpuMin / itersPerTiming);
+    } else if (kAvg_Result == result) {
+        int divisor = numTimings * itersPerTiming;
+        wallStr.appendf(doubleFormat, wallSum / divisor);
+        truncWallStr.appendf(doubleFormat, truncWallSum / divisor);
+        cpuStr.appendf(doubleFormat, cpuSum / divisor);
+        truncCpuStr.appendf(doubleFormat, truncCpuSum / divisor);
+        gpuStr.appendf(doubleFormat, gpuSum / divisor);
+    }
+
+    SkString str;
+    str.printf("  %4s:", configName);
+    if (timerFlags & kWall_Flag) {
+        str += wallStr;
+    }
+    if (timerFlags & kTruncatedWall_Flag) {
+        str += truncWallStr;
+    }
+    if (timerFlags & kCpu_Flag) {
+        str += cpuStr;
+    }
+    if (timerFlags & kTruncatedCpu_Flag) {
+        str += truncCpuStr;
+    }
+    if ((timerFlags & kGpu_Flag) && gpuSum > 0) {
+        str += gpuStr;
+    }
+    return str;
+}
+
+Json::Value TimerData::getJSON(uint32_t timerFlags,
+                               Result result,
+                               int itersPerTiming) {
+    SkASSERT(itersPerTiming >= 1);
+    Json::Value dataNode;
+    Json::Value wallNode, truncWall, cpuNode, truncCpu, gpuNode;
+    if (!fCurrTiming) {
+        return dataNode;
+    }
+
+    int numTimings = fCurrTiming;
+
+    double wallMin = std::numeric_limits<double>::max();
+    double truncWallMin = std::numeric_limits<double>::max();
+    double cpuMin = std::numeric_limits<double>::max();
+    double truncCpuMin = std::numeric_limits<double>::max();
+    double gpuMin = std::numeric_limits<double>::max();
+
+    double wallSum = 0;
+    double truncWallSum = 0;
+    double cpuSum = 0;
+    double truncCpuSum = 0;
+    double gpuSum = 0;
+
+    for (int i = 0; i < numTimings; ++i) {
+        if (kPerIter_Result == result) {
+            wallNode.append(fWallTimes[i] / itersPerTiming);
+            truncWall.append(fTruncatedWallTimes[i] / itersPerTiming);
+            cpuNode.append(fCpuTimes[i] / itersPerTiming);
+            truncCpu.append(fTruncatedCpuTimes[i] / itersPerTiming);
+            gpuNode.append(fGpuTimes[i] / itersPerTiming);
+        } else if (kMin_Result == result) {
+            wallMin = SkTMin(wallMin, fWallTimes[i]);
+            truncWallMin = SkTMin(truncWallMin, fTruncatedWallTimes[i]);
+            cpuMin = SkTMin(cpuMin, fCpuTimes[i]);
+            truncCpuMin = SkTMin(truncCpuMin, fTruncatedCpuTimes[i]);
+            gpuMin = SkTMin(gpuMin, fGpuTimes[i]);
+        } else {
+            SkASSERT(kAvg_Result == result);
+            wallSum += fWallTimes[i];
+            truncWallSum += fTruncatedWallTimes[i];
+            cpuSum += fCpuTimes[i];
+            truncCpuSum += fTruncatedCpuTimes[i];
+        }
+
+        // We always track the GPU sum because whether it is non-zero indicates if valid gpu times
+        // were recorded at all.
+        gpuSum += fGpuTimes[i];
+    }
+
+    if (kMin_Result == result) {
+        wallNode.append(wallMin / itersPerTiming);
+        truncWall.append(truncWallMin / itersPerTiming);
+        cpuNode.append(cpuMin / itersPerTiming);
+        truncCpu.append(truncCpuMin / itersPerTiming);
+        gpuNode.append(gpuMin / itersPerTiming);
+    } else if (kAvg_Result == result) {
+        int divisor = numTimings * itersPerTiming;
+        wallNode.append(wallSum / divisor);
+        truncWall.append(truncWallSum / divisor);
+        cpuNode.append(cpuSum / divisor);
+        truncCpu.append(truncCpuSum / divisor);
+        gpuNode.append(gpuSum / divisor);
+    }
+
+    if (timerFlags & kWall_Flag) {
+        dataNode["wall"] = wallNode;
+    }
+    if (timerFlags & kTruncatedWall_Flag) {
+        dataNode["truncWall"] = truncWall;
+    }
+    if (timerFlags & kCpu_Flag) {
+        dataNode["cpu"] = cpuNode;
+    }
+    if (timerFlags & kTruncatedCpu_Flag) {
+        dataNode["trucCpu"] = truncCpu;
+    }
+    if ((timerFlags & kGpu_Flag) && gpuSum > 0) {
+        dataNode["gpu"] = gpuNode;
+    }
+    return dataNode;
+}
diff --git a/tools/timer/TimerData.h b/tools/timer/TimerData.h
new file mode 100644
index 0000000..35e94dc
--- /dev/null
+++ b/tools/timer/TimerData.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef TimerData_DEFINED
+#define TimerData_DEFINED
+
+#include "SkString.h"
+#include "SkTemplates.h"
+
+#ifdef SK_BUILD_FOR_WIN
+    #pragma warning(push)
+    #pragma warning(disable : 4530)
+#endif
+
+#include "SkJSONCPP.h"
+
+#ifdef SK_BUILD_FOR_WIN
+    #pragma warning(pop)
+#endif
+
+class Timer;
+
+class TimerData {
+public:
+    /**
+     * Constructs a TimerData to hold at most maxNumTimings sets of elapsed timer values.
+     **/
+    explicit TimerData(int maxNumTimings);
+
+    /**
+     * Collect times from the Timer for an iteration. It will fail if called more often than
+     * indicated in the constructor.
+     *
+     * @param Timer Must not be null.
+     */
+    bool appendTimes(Timer*);
+
+    enum Result {
+        kMin_Result,
+        kAvg_Result,
+        kPerIter_Result
+    };
+
+    enum TimerFlags {
+        kWall_Flag              = 0x1,
+        kTruncatedWall_Flag     = 0x2,
+        kCpu_Flag               = 0x4,
+        kTruncatedCpu_Flag      = 0x8,
+        kGpu_Flag               = 0x10
+    };
+
+    /**
+     * Gets the timer data results as a string.
+     * @param doubleFormat printf-style format for doubles (e.g. "%02d")
+     * @param result the type of result desired
+     * @param the name of the config being timed (prepended to results string)
+     * @param timerFlags bitfield of TimerFlags values indicating which timers should be reported.
+     * @param itersPerTiming the number of test/bench iterations that correspond to each
+     *        appendTimes() call, 1 when appendTimes is called for each iteration.
+     */
+    SkString getResult(const char* doubleFormat,
+                       Result result,
+                       const char* configName,
+                       uint32_t timerFlags,
+                       int itersPerTiming = 1);
+    Json::Value getJSON(uint32_t timerFlags,
+                        Result result,
+                        int itersPerTiming = 1);
+
+private:
+    int fMaxNumTimings;
+    int fCurrTiming;
+
+    SkAutoTArray<double> fWallTimes;
+    SkAutoTArray<double> fTruncatedWallTimes;
+    SkAutoTArray<double> fCpuTimes;
+    SkAutoTArray<double> fTruncatedCpuTimes;
+    SkAutoTArray<double> fGpuTimes;
+};
+
+#endif // TimerData_DEFINED