Higher resolution timers for bench.
http://codereview.appspot.com/4548090/


git-svn-id: http://skia.googlecode.com/svn/trunk@1534 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/Makefile.old b/Makefile.old
index 7881dfa..c78c101 100644
--- a/Makefile.old
+++ b/Makefile.old
@@ -133,7 +133,7 @@
 	endif
 	
 else
-	LINKER_OPTS += -lpng -lfreetype
+	LINKER_OPTS += -lpng -lfreetype -lrt
 	DEFINES += -DSK_BUILD_FOR_UNIX -DSK_ENABLE_LIBPNG -DGR_LINUX_BUILD=1
 	ifeq ($(SKIA_MESA),true)
 		LINKER_OPTS += -lOSMesa -lGLU
@@ -203,8 +203,15 @@
 ##############################################################################
 
 BENCH_SRCS := RectBench.cpp SkBenchmark.cpp benchmain.cpp BitmapBench.cpp \
-			  RepeatTileBench.cpp DecodeBench.cpp FPSBench.cpp PathBench.cpp \
-			  GradientBench.cpp MatrixBench.cpp ScalarBench.cpp
+		RepeatTileBench.cpp DecodeBench.cpp FPSBench.cpp PathBench.cpp \
+		GradientBench.cpp MatrixBench.cpp ScalarBench.cpp \
+		BenchTimer.cpp BenchGpuTimer_gl.cpp
+ 
+ifeq ($(SKIA_BUILD_FOR),mac)
+    BENCH_SRCS += BenchSysTimer_mach.cpp
+else
+    BENCH_SRCS += BenchSysTimer_posix.cpp
+endif
 
 BENCH_SRCS := $(addprefix bench/, $(BENCH_SRCS))
 
@@ -318,4 +325,5 @@
 	@echo "    SKIA_SCALAR=fixed for fixed-point build"
 	@echo "    SKIA_BUILD_FOR=mac for mac build (e.g. CG for image decoding)"
 	@echo "    SKIA_PDF_SUPPORT=false to disable the pdf generation backend"
+	@echo "    SKIA_MESA=true to build with osmesa instead of native GL.
 	@echo ""
diff --git a/bench/BenchGpuTimer_gl.cpp b/bench/BenchGpuTimer_gl.cpp
new file mode 100644
index 0000000..ec2145d
--- /dev/null
+++ b/bench/BenchGpuTimer_gl.cpp
@@ -0,0 +1,181 @@
+#include "BenchGpuTimer_gl.h"
+#include <string.h>
+
+//GL
+#define BENCH_GL_FUNCTION_TYPE
+#if defined(SK_MESA)
+    #include <GL/osmesa.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != OSMesaGetCurrentContext())
+    
+    #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \
+            OSMesaGetProcAddress("gl" #F);
+    #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\
+            OSMesaGetProcAddress("gl" #F #S);
+
+#elif defined(SK_BUILD_FOR_WIN32)
+    #define WIN32_LEAN_AND_MEAN 1
+    #include <Windows.h>
+    #include <GL/GL.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != wglGetCurrentContext())
+    
+    #undef BENCH_GL_FUNCTION_TYPE
+    #define BENCH_GL_FUNCTION_TYPE __stdcall
+
+    #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \
+            wglGetProcAddress("gl" #F);
+    #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\
+            wglGetProcAddress("gl" #F #S);
+    
+#elif defined(SK_BUILD_FOR_MAC)
+    #include <OpenGL/gl.h>
+    #include <OpenGL/CGLCurrent.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != CGLGetCurrentContext())
+    
+#elif defined(SK_BUILD_FOR_UNIX)
+    #include <GL/gl.h>
+    #include <GL/glx.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != glXGetCurrentContext())
+    
+    #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \
+            glXGetProcAddressARB(reinterpret_cast<const GLubyte*>("gl" #F));
+    #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\
+            glXGetProcAddressARB(reinterpret_cast<const GLubyte*>("gl" #F #S));
+#else
+    #error unsupported platform
+#endif
+
+#define BenchGL_TIME_ELAPSED 0x88BF
+#define BenchGL_QUERY_RESULT 0x8866
+#define BenchGL_QUERY_RESULT_AVAILABLE 0x8867
+
+#if defined(SK_BUILD_FOR_WIN32)
+typedef UINT64 BenchGLuint64;
+#else
+#include <stdint.h>
+typedef uint64_t BenchGLuint64;
+#endif
+
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGenQueriesProc) (GLsizei n, GLuint *ids);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLBeginQueryProc) (GLenum target, GLuint id);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLEndQueryProc) (GLenum target);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLDeleteQueriesProc) (GLsizei n, const GLuint *ids);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGetQueryObjectivProc) (GLuint id, GLenum pname, GLint *params);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGetQueryObjectui64vProc) (GLuint id, GLenum pname, BenchGLuint64 *params);
+
+struct BenchGLInterface {
+    bool fHasTimer;
+    BenchGLGenQueriesProc fGenQueries;
+    BenchGLBeginQueryProc fBeginQuery;
+    BenchGLEndQueryProc fEndQuery;
+    BenchGLDeleteQueriesProc fDeleteQueries;
+    BenchGLGetQueryObjectivProc fGetQueryObjectiv;
+    BenchGLGetQueryObjectui64vProc fGetQueryObjectui64v;
+};
+
+static bool BenchGLCheckExtension(const char* ext,
+                                  const char* extensionString) {
+    int extLength = strlen(ext);
+
+    while (true) {
+        int n = strcspn(extensionString, " ");
+        if (n == extLength && 0 == strncmp(ext, extensionString, n)) {
+            return true;
+        }
+        if (0 == extensionString[n]) {
+            return false;
+        }
+        extensionString += n+1;
+    }
+
+    return false;
+}
+
+static BenchGLInterface gBenchGL;
+static bool gBenchGLInterfaceInit = false;
+
+static void BenchGLSetDefaultGLInterface() {
+    gBenchGL.fHasTimer = false;
+    if (gBenchGLInterfaceInit || !SK_BENCH_CONTEXT_CHECK) return;
+
+    const char* glExts =
+        reinterpret_cast<const char*>(glGetString(GL_EXTENSIONS));
+    const GLboolean ext =
+        BenchGLCheckExtension("GL_EXT_timer_query", glExts);
+    const GLboolean arb =
+        BenchGLCheckExtension("GL_ARB_timer_query", glExts);
+    if (ext || arb) {
+#if defined(SK_BUILD_FOR_MAC)
+        #if GL_EXT_timer_query || GL_ARB_timer_query
+        gBenchGL.fHasTimer = true;
+        gBenchGL.fGenQueries = glGenQueries;
+        gBenchGL.fBeginQuery = glBeginQuery;
+        gBenchGL.fEndQuery = glEndQuery;
+        gBenchGL.fDeleteQueries = glDeleteQueries;
+        gBenchGL.fGetQueryObjectiv = glGetQueryObjectiv;
+        #endif
+        #if GL_ARB_timer_query
+        gBenchGL.fGetQueryObjectui64v = glGetQueryObjectui64v;
+        #elif GL_EXT_timer_query
+        gBenchGL.fGetQueryObjectui64v = glGetQueryObjectui64vEXT;
+        #endif
+#else
+        gBenchGL.fHasTimer = true;
+        SK_GL_GET_PROC(GenQueries)
+        SK_GL_GET_PROC(BeginQuery)
+        SK_GL_GET_PROC(EndQuery)
+        SK_GL_GET_PROC(DeleteQueries)
+        
+        SK_GL_GET_PROC(GetQueryObjectiv)
+        if (arb) {
+            SK_GL_GET_PROC(GetQueryObjectui64v)
+        } else {
+            SK_GL_GET_PROC_SUFFIX(GetQueryObjectui64v, EXT)
+        }
+#endif
+    }
+    gBenchGLInterfaceInit = true;
+}
+
+BenchGpuTimer::BenchGpuTimer() {
+    BenchGLSetDefaultGLInterface();
+    if (gBenchGL.fHasTimer) {
+        gBenchGL.fGenQueries(1, &this->fQuery);
+    }
+}
+
+BenchGpuTimer::~BenchGpuTimer() {
+    if (gBenchGL.fHasTimer) {
+        gBenchGL.fDeleteQueries(1, &this->fQuery);
+    }
+}
+
+void BenchGpuTimer::startGpu() {
+    if (!gBenchGL.fHasTimer) return;
+    
+    this->fStarted = true;
+    gBenchGL.fBeginQuery(BenchGL_TIME_ELAPSED, this->fQuery);
+}
+
+/**
+ * It is important to stop the cpu clocks first,
+ * as this will cpu wait for the gpu to finish.
+ */
+double BenchGpuTimer::endGpu() {
+    if (!gBenchGL.fHasTimer) return 0;
+    
+    this->fStarted = false;
+    gBenchGL.fEndQuery(BenchGL_TIME_ELAPSED);
+    
+    GLint available = 0;
+    while (!available) {
+        gBenchGL.fGetQueryObjectiv(this->fQuery
+                                 , BenchGL_QUERY_RESULT_AVAILABLE
+                                 , &available);
+    }
+    BenchGLuint64 totalGPUTimeElapsed = 0;
+    gBenchGL.fGetQueryObjectui64v(this->fQuery
+                                , BenchGL_QUERY_RESULT
+                                , &totalGPUTimeElapsed);
+    
+    return totalGPUTimeElapsed / 1000000.0;
+}
diff --git a/bench/BenchGpuTimer_gl.h b/bench/BenchGpuTimer_gl.h
new file mode 100644
index 0000000..ac23482
--- /dev/null
+++ b/bench/BenchGpuTimer_gl.h
@@ -0,0 +1,33 @@
+#ifndef SkBenchGpuTimer_DEFINED
+#define SkBenchGpuTimer_DEFINED
+
+#if defined(SK_MESA)
+    #include <GL/osmesa.h>
+
+#elif defined(SK_BUILD_FOR_WIN32)
+    #define WIN32_LEAN_AND_MEAN 1
+    #include <Windows.h>
+    #include <GL/GL.h>
+    
+#elif defined(SK_BUILD_FOR_MAC)
+    #include <OpenGL/gl.h>
+    
+#elif defined(SK_BUILD_FOR_UNIX)
+    #include <GL/gl.h>
+
+#else
+    #error unsupported platform
+#endif
+
+class BenchGpuTimer {
+public:
+    BenchGpuTimer();
+    ~BenchGpuTimer();
+    void startGpu();
+    double endGpu();
+private:
+    GLuint fQuery;
+    int fStarted;
+};
+
+#endif
diff --git a/bench/BenchGpuTimer_none.cpp b/bench/BenchGpuTimer_none.cpp
new file mode 100644
index 0000000..4ead4ac
--- /dev/null
+++ b/bench/BenchGpuTimer_none.cpp
@@ -0,0 +1,13 @@
+#include "BenchGpuTimer_none.h"
+
+BenchGpuTimer::BenchGpuTimer() {
+}
+
+BenchTimer::~BenchTimer() {
+}
+
+void BenchTimer::startGpu() {
+}
+
+void BenchTimer::endGpu() {
+}
diff --git a/bench/BenchGpuTimer_none.h b/bench/BenchGpuTimer_none.h
new file mode 100644
index 0000000..7069ca4
--- /dev/null
+++ b/bench/BenchGpuTimer_none.h
@@ -0,0 +1,12 @@
+#ifndef SkBenchGpuTimer_DEFINED
+#define SkBenchGpuTimer_DEFINED
+
+class BenchGpuTimer {
+public:
+    BenchGpuTimer();
+    ~BenchGpuTimer();
+    void startGpu();
+    double endGpu();
+};
+
+#endif
diff --git a/bench/BenchSysTimer_c.cpp b/bench/BenchSysTimer_c.cpp
new file mode 100644
index 0000000..fc0850b
--- /dev/null
+++ b/bench/BenchSysTimer_c.cpp
@@ -0,0 +1,20 @@
+#include "BenchSysTimer_c.h"
+
+//Time
+#include <time.h>
+
+void BenchSysTimer::startWall() {
+    this->fStartWall = time();
+}
+void BenchSysTimer::startCpu() {
+    this->fStartCpu = clock();
+}
+
+double BenchSysTimer::endCpu() {
+    clock_t end_cpu = clock();
+    this->fCpu = (end_cpu - this->fStartCpu) * CLOCKS_PER_SEC / 1000.0;
+}
+double BenchSysTimer::endWall() {
+    time_t end_wall = time();
+    this->fWall = difftime(end_wall, this->fstartWall) / 1000.0;
+}
diff --git a/bench/BenchSysTimer_c.h b/bench/BenchSysTimer_c.h
new file mode 100644
index 0000000..c598f30
--- /dev/null
+++ b/bench/BenchSysTimer_c.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#include <time.h>
+#warning standard clocks
+
+class BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    clock_t start_cpu;
+    time_t fStartWall;
+};
+
+#endif
diff --git a/bench/BenchSysTimer_mach.cpp b/bench/BenchSysTimer_mach.cpp
new file mode 100644
index 0000000..b23897c
--- /dev/null
+++ b/bench/BenchSysTimer_mach.cpp
@@ -0,0 +1,69 @@
+#include "BenchSysTimer_mach.h"
+
+//Time
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+
+static time_value_t macCpuTime() {
+    mach_port_t task = mach_task_self();
+    if (task == MACH_PORT_NULL) {
+        time_value_t none = {0, 0};
+        return none;
+    }
+    
+    task_thread_times_info thread_info_data;
+    mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT;
+    if (KERN_SUCCESS != task_info(task,
+                 TASK_THREAD_TIMES_INFO,
+                 reinterpret_cast<task_info_t>(&thread_info_data),
+                 &thread_info_count))
+    {
+        time_value_t none = {0, 0};
+        return none;
+    }
+    
+    time_value_add(&thread_info_data.user_time, &thread_info_data.system_time)
+    return thread_info_data.user_time;
+}
+
+static double intervalInMSec(const time_value_t start_clock
+                           , const time_value_t end_clock)
+{
+    double duration_clock;
+    if ((end_clock.microseconds - start_clock.microseconds) < 0) {
+        duration_clock = (end_clock.seconds - start_clock.seconds-1)*1000;
+        duration_clock += (1000000
+                           + end_clock.microseconds
+                           - start_clock.microseconds) / 1000.0;
+    } else {
+        duration_clock = (end_clock.seconds - start_clock.seconds)*1000;
+        duration_clock += (end_clock.microseconds - start_clock.microseconds)
+                           / 1000.0;
+    }
+    return duration_clock;
+}
+
+void BenchSysTimer::startWall() {
+    this->fStartWall = mach_absolute_time();
+}
+void BenchSysTimer::startCpu() {
+    this->fStartCpu = macCpuTime();
+}
+
+double BenchSysTimer::endCpu() {
+    time_value_t end_cpu = macCpuTime();
+    return intervalInMSec(this->fStartCpu, end_cpu);
+}
+double BenchSysTimer::endWall() {
+    uint64_t end_wall = mach_absolute_time();
+    
+    uint64_t elapsed = end_wall - this->fStartWall;
+    mach_timebase_info_data_t sTimebaseInfo;
+    if (KERN_SUCCESS != mach_timebase_info(&sTimebaseInfo)) {
+        return 0;
+    } else {
+        uint64_t elapsedNano = elapsed * sTimebaseInfo.numer
+                               / sTimebaseInfo.denom;
+        return elapsedNano / 1000000;
+    }
+}
diff --git a/bench/BenchSysTimer_mach.h b/bench/BenchSysTimer_mach.h
new file mode 100644
index 0000000..da4fff0
--- /dev/null
+++ b/bench/BenchSysTimer_mach.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+
+class BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    time_value_t fStartCpu;
+    uint64_t fStartWall;
+};
+
+#endif
diff --git a/bench/BenchSysTimer_posix.cpp b/bench/BenchSysTimer_posix.cpp
new file mode 100644
index 0000000..5d28f40
--- /dev/null
+++ b/bench/BenchSysTimer_posix.cpp
@@ -0,0 +1,50 @@
+#include "BenchSysTimer_posix.h"
+
+//Time
+#include <time.h>
+
+static double intervalInMSec(const timespec start_clock
+                           , const timespec end_clock)
+{
+    double duration_clock;
+    if ((end_clock.tv_nsec - start_clock.tv_nsec) < 0) {
+        duration_clock = (end_clock.tv_sec - start_clock.tv_sec-1)*1000;
+        duration_clock += (1000000000 + end_clock.tv_nsec - start_clock.tv_nsec)
+                           / 1000000.0;
+    } else {
+        duration_clock = (end_clock.tv_sec - start_clock.tv_sec)*1000;
+        duration_clock += (end_clock.tv_nsec - start_clock.tv_nsec) / 1000000.0;
+    }
+    return duration_clock;
+}
+
+void BenchSysTimer::startWall() {
+    if (-1 == clock_gettime(CLOCK_MONOTONIC, &this->fWall)) {
+        timespec none = {0, 0};
+        this->fWall = none;
+    }
+}
+void BenchSysTimer::startCpu() {
+    if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &this->fCpu)) {
+        timespec none = {0, 0};
+        this->fCpu = none;
+    }
+}
+
+double BenchSysTimer::endCpu() {
+    timespec end_cpu;
+    if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_cpu)) {
+        timespec none = {0, 0};
+        end_cpu = none;
+    }
+    return intervalInMSec(this->fCpu, end_cpu);
+}
+
+double BenchSysTimer::endWall() {
+    timespec end_wall;
+    if (-1 == clock_gettime(CLOCK_MONOTONIC, &end_wall)) {
+        timespec none = {0, 0};
+        end_wall = none;
+    }
+    return intervalInMSec(this->fWall, end_wall);
+}
diff --git a/bench/BenchSysTimer_posix.h b/bench/BenchSysTimer_posix.h
new file mode 100644
index 0000000..09dfb0e
--- /dev/null
+++ b/bench/BenchSysTimer_posix.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#include <time.h>
+
+class BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    timespec fCpu;
+    timespec fWall;
+};
+
+#endif
+
diff --git a/bench/BenchSysTimer_windows.cpp b/bench/BenchSysTimer_windows.cpp
new file mode 100644
index 0000000..923754c
--- /dev/null
+++ b/bench/BenchSysTimer_windows.cpp
@@ -0,0 +1,55 @@
+#include "BenchSysTimer_windows.h"
+
+//Time
+#define WIN32_LEAN_AND_MEAN 1
+#include <Windows.h>
+
+static ULONGLONG winCpuTime() {
+    FILETIME createTime;
+    FILETIME exitTime;
+    FILETIME usrTime;
+    FILETIME sysTime;
+    if (0 == GetProcessTimes(GetCurrentProcess()
+                           , &createTime, &exitTime
+                           , &sysTime, &usrTime))
+    {
+        return 0;
+    }
+    ULARGE_INTEGER start_cpu_sys;
+    ULARGE_INTEGER start_cpu_usr;
+    start_cpu_sys.LowPart  = sysTime.dwLowDateTime;
+    start_cpu_sys.HighPart = sysTime.dwHighDateTime;
+    start_cpu_usr.LowPart  = usrTime.dwLowDateTime;
+    start_cpu_usr.HighPart = usrTime.dwHighDateTime;
+    return start_cpu_sys.QuadPart + start_cpu_usr.QuadPart;
+}
+
+void BenchSysTimer::startWall() {
+    if (0 == ::QueryPerformanceCounter(&this->fStartWall)) {
+        this->fStartWall.QuadPart = 0;
+    }
+}
+void BenchSysTimer::startCpu() {
+    this->fStartCpu = winCpuTime();
+}
+
+double BenchSysTimer::endCpu() {
+    ULONGLONG end_cpu = winCpuTime();
+    return (end_cpu - this->fStartCpu) / 10000;
+}
+double BenchSysTimer::endWall() {
+    LARGE_INTEGER end_wall;
+    if (0 == ::QueryPerformanceCounter(&end_wall)) {
+        end_wall.QuadPart = 0;
+    }
+    
+    LARGE_INTEGER ticks_elapsed;
+    ticks_elapsed.QuadPart = end_wall.QuadPart - this->fStartWall.QuadPart;
+    
+    LARGE_INTEGER frequency;
+    if (0 == ::QueryPerformanceFrequency(&frequency)) {
+        return 0;
+    } else {
+        return (double)ticks_elapsed.QuadPart / frequency.QuadPart * 1000;
+    }
+}
diff --git a/bench/BenchSysTimer_windows.h b/bench/BenchSysTimer_windows.h
new file mode 100644
index 0000000..72a3fb2
--- /dev/null
+++ b/bench/BenchSysTimer_windows.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#define WIN32_LEAN_AND_MEAN 1
+#include <Windows.h>
+
+struct BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    ULONGLONG fStartCpu;
+    LARGE_INTEGER fStartWall;
+};
+
+#endif
diff --git a/bench/BenchTimer.cpp b/bench/BenchTimer.cpp
new file mode 100644
index 0000000..e7b0068
--- /dev/null
+++ b/bench/BenchTimer.cpp
@@ -0,0 +1,48 @@
+#include "BenchTimer.h"
+#if defined(SK_BUILD_FOR_WIN32)
+    #include "BenchSysTimer_windows.h"
+#elif defined(SK_BUILD_FOR_MAC)
+    #include "BenchSysTimer_mach.h"
+#elif defined(SK_BUILD_FOR_UNIX)
+    #include "BenchSysTimer_posix.h"
+#else
+    #include "BenchSysTimer_c.h"
+#endif
+
+#if defined(SK_MESA) || \
+    defined(SK_BUILD_FOR_WIN32) || \
+    defined(SK_BUILD_FOR_MAC) || \
+    defined(SK_BUILD_FOR_UNIX)
+    #include "BenchGpuTimer_gl.h"
+
+#else
+    #include "BenchGpuTimer_none.h"
+#endif
+
+BenchTimer::BenchTimer()
+        : fCpu(-1.0)
+        , fWall(-1.0)
+        , fGpu(-1.0)
+{
+    this->fSysTimer = new BenchSysTimer();
+    this->fGpuTimer = new BenchGpuTimer();
+}
+
+BenchTimer::~BenchTimer() {
+    delete this->fSysTimer;
+    delete this->fGpuTimer;
+}
+
+void BenchTimer::start() {
+    this->fSysTimer->startWall();
+    this->fGpuTimer->startGpu();
+    this->fSysTimer->startCpu();
+}
+
+void BenchTimer::end() {
+    this->fCpu = this->fSysTimer->endCpu();
+    //It is important to stop the cpu clocks first,
+    //as the following will cpu wait for the gpu to finish.
+    this->fGpu = this->fGpuTimer->endGpu();
+    this->fWall = this->fSysTimer->endWall();
+}
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
new file mode 100644
index 0000000..eae82d5
--- /dev/null
+++ b/bench/BenchTimer.h
@@ -0,0 +1,27 @@
+#ifndef SkBenchTimer_DEFINED
+#define SkBenchTimer_DEFINED
+
+class BenchSysTimer;
+class BenchGpuTimer;
+
+/**
+ * SysTimers and GpuTimers are implemented orthogonally.
+ * This class combines a SysTimer and a GpuTimer into one single,
+ * platform specific, Timer with a simple interface.
+ */
+class BenchTimer {
+public:
+    BenchTimer();
+    ~BenchTimer();
+    void start();
+    void end();
+    double fCpu;
+    double fWall;
+    double fGpu;
+    
+private:
+    BenchSysTimer *fSysTimer;
+    BenchGpuTimer *fGpuTimer;
+};
+
+#endif
diff --git a/bench/benchmain.cpp b/bench/benchmain.cpp
index 066573a..34f8a1a 100644
--- a/bench/benchmain.cpp
+++ b/bench/benchmain.cpp
@@ -5,12 +5,12 @@
 #include "SkNWayCanvas.h"
 #include "SkPicture.h"
 #include "SkString.h"
-#include "SkTime.h"
 #include "GrContext.h"
 #include "SkGpuDevice.h"
 #include "SkEGLContext.h"
 
 #include "SkBenchmark.h"
+#include "BenchTimer.h"
 
 #ifdef ANDROID
 static void log_error(const char msg[]) { SkDebugf("%s", msg); }
@@ -212,6 +212,9 @@
     bool forceAA = true;
     bool forceFilter = false;
     SkTriState::State forceDither = SkTriState::kDefault;
+    bool timerWall = false;
+    bool timerCpu = true;
+    bool timerGpu = true;
     bool doScale = false;
     bool doRotate = false;
     bool doClip = false;
@@ -246,6 +249,23 @@
                 log_error("missing arg for -repeat\n");
                 return -1;
             }
+        } else if (strcmp(*argv, "-timers") == 0) {
+            argv++;
+            if (argv < stop) {
+                timerWall = false;
+                timerCpu = false;
+                timerGpu = false;
+                for (char* t = *argv; *t; ++t) {
+                    switch (*t) {
+                    case 'w': timerWall = true; break;
+                    case 'c': timerCpu = true; break;
+                    case 'g': timerGpu = true; break;
+                    }
+                }
+            } else {
+                log_error("missing arg for -timers\n");
+                return -1;
+            }
         } else if (!strcmp(*argv, "-rotate")) {
             doRotate = true;
         } else if (!strcmp(*argv, "-scale")) {
@@ -346,6 +366,8 @@
         context = GrContext::CreateGLShaderContext();
     }
     
+    BenchTimer timer = BenchTimer();
+    
     Iter iter(&defineDict);
     SkBenchmark* bench;
     while ((bench = iter.next()) != NULL) {
@@ -399,30 +421,36 @@
                 performRotate(&canvas, dim.fX, dim.fY);
             }
             
+            bool gpu = kGPU_Backend == backend && context;
             //warm up caches if needed
             if (repeatDraw > 1) {
                 SkAutoCanvasRestore acr(&canvas, true);
                 bench->draw(&canvas);
-                if (kGPU_Backend == backend && context) {
+                if (gpu) {
                     context->flush();
                     glFinish();
                 }
             }
             
-            SkMSec now = SkTime::GetMSecs();
+            timer.start();
             for (int i = 0; i < repeatDraw; i++) {
                 SkAutoCanvasRestore acr(&canvas, true);
                 bench->draw(&canvas);
             }
-            if (kGPU_Backend == backend && context) {
-                context->flush();
-                glFinish();
-            }
+            timer.end();
             
             if (repeatDraw > 1) {
-                double duration = SkTime::GetMSecs() - now;
                 SkString str;
-                str.printf("  %4s: msecs = %5.2f", configName, duration / repeatDraw);
+                str.printf("  %4s:", configName);
+                if (timerWall) {
+                    str.appendf(" msecs = %6.2f", timer.fWall / repeatDraw);
+                }
+                if (timerCpu) {
+                    str.appendf(" cmsecs = %6.2f", timer.fCpu / repeatDraw);
+                }
+                if (timerGpu && gpu && timer.fGpu > 0) {
+                    str.appendf(" gmsecs = %6.2f", timer.fGpu / repeatDraw);
+                }
                 log_progress(str);
             }
             if (outDir.size() > 0) {
diff --git a/gyp/bench.gyp b/gyp/bench.gyp
index 6054c23..01aea2e 100644
--- a/gyp/bench.gyp
+++ b/gyp/bench.gyp
@@ -17,6 +17,18 @@
       'type': 'executable',
       'sources': [
         '../bench/benchmain.cpp',
+        '../bench/BenchTimer.h',
+        '../bench/BenchTimer.cpp',
+        '../bench/BenchSysTimer_mach.h',
+        '../bench/BenchSysTimer_mach.cpp',
+        '../bench/BenchSysTimer_posix.h',
+        '../bench/BenchSysTimer_posix.cpp',
+        '../bench/BenchSysTimer_windows.h',
+        '../bench/BenchSysTimer_windows.cpp',
+        '../bench/BenchGpuTimer_gl.h',
+        '../bench/BenchGpuTimer_gl.cpp',
+        '../bench/BenchGpuTimer_none.h',
+        '../bench/BenchGpuTimer_none.cpp',
         
         '../bench/SkBenchmark.h',
         '../bench/SkBenchmark.cpp',
@@ -38,6 +50,46 @@
         'images.gyp:images',
         'utils.gyp:utils',
       ],
+      'conditions': [
+        [ 'OS != "mac"', {
+          'sources!': [
+            '../bench/BenchSysTimer_mach.h',
+            '../bench/BenchSysTimer_mach.cpp',
+          ],
+        }],
+        [ 'OS not in ["linux", "freebsd", "openbsd", "solaris"]', {
+          'sources!': [
+            '../bench/BenchSysTimer_posix.h',
+            '../bench/BenchSysTimer_posix.cpp',
+          ],
+        }],
+        [ 'OS in ["linux", "freebsd", "openbsd", "solaris"]', {
+          'link_settings': {
+            'libraries': [
+              '-lrt',
+            ],
+          },
+        }],
+        [ 'OS != "win"', {
+          'sources!': [
+            '../bench/BenchSysTimer_windows.h',
+            '../bench/BenchSysTimer_windows.cpp',
+          ],
+        }],
+
+        [ 'OS in ["win", "mac", "linux", "freebsd", "openbsd", "solaris"]', {
+          'sources!': [
+            '../bench/BenchGpuTimer_none.h',
+            '../bench/BenchGpuTimer_none.cpp',
+          ],
+        }],
+        [ 'OS not in ["win", "mac", "linux", "freebsd", "openbsd", "solaris"]', {
+          'sources!': [
+            '../bench/BenchGpuTimer_gl.h',
+            '../bench/BenchGpuTimer_gl.cpp',
+          ],
+        }],
+      ],
     },
   ],
 }