nanobench on Windows: try compiler barriers around timer instead.

BUG=skia:

Review URL: https://codereview.chromium.org/391193003
diff --git a/gyp/bench.gyp b/gyp/bench.gyp
index 86fb1df..6618403 100644
--- a/gyp/bench.gyp
+++ b/gyp/bench.gyp
@@ -48,9 +48,6 @@
         'tools.gyp:crash_handler',
         'tools.gyp:timer',
       ],
-
-      # Experiment to see if LTCG is the key difference leading to Windows infinite loops.
-      'msvs_settings': { 'VCLinkerTool': { 'LinkTimeCodeGeneration': '0' } },
     },
   ],
 }
diff --git a/tools/timer/SysTimer_windows.cpp b/tools/timer/SysTimer_windows.cpp
index 2f9d0a5..d24a6aa 100644
--- a/tools/timer/SysTimer_windows.cpp
+++ b/tools/timer/SysTimer_windows.cpp
@@ -6,6 +6,8 @@
  */
 #include "SysTimer_windows.h"
 
+#include <intrin.h>
+
 static ULONGLONG win_cpu_time() {
     FILETIME createTime;
     FILETIME exitTime;
@@ -23,11 +25,6 @@
     return start_cpu_sys.QuadPart + start_cpu_usr.QuadPart;
 }
 
-void SysTimer::startWall() {
-    if (0 == ::QueryPerformanceCounter(&fStartWall)) {
-        fStartWall.QuadPart = 0;
-    }
-}
 void SysTimer::startCpu() {
     fStartCpu = win_cpu_time();
 }
@@ -36,11 +33,22 @@
     ULONGLONG end_cpu = win_cpu_time();
     return static_cast<double>(end_cpu - fStartCpu) / 10000.0L;
 }
+
+static void wall_timestamp(LARGE_INTEGER* now) {
+    _ReadWriteBarrier();
+    if (0 == ::QueryPerformanceCounter(now)) {
+        now->QuadPart = 0;
+    }
+    _ReadWriteBarrier();
+}
+
+void SysTimer::startWall() {
+    wall_timestamp(&fStartWall);
+}
+
 double SysTimer::endWall() {
     LARGE_INTEGER end_wall;
-    if (0 == ::QueryPerformanceCounter(&end_wall)) {
-        end_wall.QuadPart = 0;
-    }
+    wall_timestamp(&end_wall);
 
     LARGE_INTEGER ticks_elapsed;
     ticks_elapsed.QuadPart = end_wall.QuadPart - fStartWall.QuadPart;