More natural way to serialize GPU tasks and tests.

This basically takes out the Windows-only hacks and promotes them to
cross-platform behavior driven by --gpu_threading.
    - When --gpu_threading is false (the default), this puts GPU tasks and tests
      together in the same GPU enclave.  They all run serially.
    - When --gpu_threading is true, both the tests and the tasks run totally
      independently, just like the thread-safe CPU-bound work.

BUG=skia:3255

Review URL: https://codereview.chromium.org/847273005
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp
index 169a0ee..6fe2d62 100644
--- a/bench/nanobench.cpp
+++ b/bench/nanobench.cpp
@@ -74,6 +74,7 @@
 DEFINE_bool(bbh, true, "Build a BBH for SKPs?");
 DEFINE_bool(mpd, true, "Use MultiPictureDraw for the SKPs?");
 DEFINE_int32(flushEvery, 10, "Flush --outResultsFile every Nth run.");
+DEFINE_bool(resetGpuContext, true, "Reset the GrContext before running each test.");
 
 static SkString humanize(double ms) {
     if (FLAGS_verbose) return SkStringPrintf("%llu", (uint64_t)(ms*1e6));
diff --git a/dm/DM.cpp b/dm/DM.cpp
index e90a3f5..d4991ef 100644
--- a/dm/DM.cpp
+++ b/dm/DM.cpp
@@ -353,7 +353,7 @@
 
 // Unit tests don't fit so well into the Src/Sink model, so we give them special treatment.
 
-static SkTDArray<skiatest::Test> gCPUTests, gGPUTests;
+static SkTDArray<skiatest::Test> gThreadedTests, gGPUTests;
 
 static void gather_tests() {
     if (!FLAGS_src.contains("tests")) {
@@ -368,9 +368,9 @@
             continue;
         }
         if (test.needsGpu && gpu_supported()) {
-            gGPUTests.push(test);
+            (FLAGS_gpu_threading ? gThreadedTests : gGPUTests).push(test);
         } else if (!test.needsGpu && FLAGS_cpu) {
-            gCPUTests.push(test);
+            gThreadedTests.push(test);
         }
     }
 }
@@ -389,7 +389,8 @@
     WallTimer timer;
     timer.start();
     if (!FLAGS_dryRun) {
-        test->proc(&reporter, GetThreadLocalGrContextFactory());
+        GrContextFactory factory;
+        test->proc(&reporter, &factory);
     }
     timer.end();
     done(timer.fWall, "unit", "test", test->name);
@@ -397,6 +398,14 @@
 
 /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
 
+// If we're isolating all GPU-bound work to one thread (the default), this function runs all that.
+static void run_enclave_and_gpu_tests(SkTArray<Task>* tasks) {
+    run_enclave(tasks);
+    for (int i = 0; i < gGPUTests.count(); i++) {
+        run_test(&gGPUTests[i]);
+    }
+}
+
 int dm_main();
 int dm_main() {
     SetupCrashHandler();
@@ -407,9 +416,9 @@
     gather_sinks();
     gather_tests();
 
-    gPending = gSrcs.count() * gSinks.count() + gCPUTests.count() + gGPUTests.count();
+    gPending = gSrcs.count() * gSinks.count() + gThreadedTests.count() + gGPUTests.count();
     SkDebugf("%d srcs * %d sinks + %d tests == %d tasks\n",
-             gSrcs.count(), gSinks.count(), gCPUTests.count() + gGPUTests.count(), gPending);
+             gSrcs.count(), gSinks.count(), gThreadedTests.count() + gGPUTests.count(), gPending);
 
     // We try to exploit as much parallelism as is safe.  Most Src/Sink pairs run on any thread,
     // but Sinks that identify as part of a particular enclave run serially on a single thread.
@@ -422,31 +431,24 @@
         }
     }
 
-    SK_COMPILE_ASSERT(kAnyThread_Enclave == 0, AnyThreadZero);
     SkTaskGroup tg;
-        tg.batch(  Task::Run, enclaves[0].begin(), enclaves[0].count());
-        tg.batch(run_enclave,          enclaves+1,      kNumEnclaves-1);
-        tg.batch(   run_test,   gCPUTests.begin(),   gCPUTests.count());
-        if (FLAGS_gpu_threading) {
-            tg.batch(run_test,  gGPUTests.begin(),   gGPUTests.count());
-        #if !defined(SK_BUILD_FOR_WIN32)
-        } else {
-            for (int i = 0; i < gGPUTests.count(); i++) {
-                run_test(&gGPUTests[i]);
-            }
-        #endif
+    tg.batch(run_test, gThreadedTests.begin(), gThreadedTests.count());
+    for (int i = 0; i < kNumEnclaves; i++) {
+        switch(i) {
+            case kAnyThread_Enclave:
+                tg.batch(Task::Run, enclaves[i].begin(), enclaves[i].count());
+                break;
+            case kGPU_Enclave:
+                tg.add(run_enclave_and_gpu_tests, &enclaves[i]);
+                break;
+            default:
+                tg.add(run_enclave, &enclaves[i]);
+                break;
         }
+    }
     tg.wait();
     // At this point we're back in single-threaded land.
 
-    // This is not ideal for parallelism, but Windows seems crash-prone if we run
-    // these GPU tests in parallel with any GPU Src/Sink work.  Everyone else seems fine.
-#if defined(SK_BUILD_FOR_WIN32)
-    for (int i = 0; i < gGPUTests.count(); i++) {
-        run_test(&gGPUTests[i]);
-    }
-#endif
-
     SkDebugf("\n");
     JsonWriter::DumpJson();
 
diff --git a/dm/DMGpuSupport.cpp b/dm/DMGpuSupport.cpp
deleted file mode 100644
index 87a406b..0000000
--- a/dm/DMGpuSupport.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "DMGpuSupport.h"
-#include "SkTLS.h"
-
-static void* create_gr_factory()        { return new GrContextFactory; }
-static void  delete_gr_factory(void* p) { delete (GrContextFactory*)p; }
-
-GrContextFactory* GetThreadLocalGrContextFactory() {
-    return (GrContextFactory*)SkTLS::Get(create_gr_factory, delete_gr_factory);
-}
diff --git a/dm/DMGpuSupport.h b/dm/DMGpuSupport.h
index 335da08..032151f 100644
--- a/dm/DMGpuSupport.h
+++ b/dm/DMGpuSupport.h
@@ -75,6 +75,4 @@
 
 #endif//SK_SUPPORT_GPU
 
-GrContextFactory* GetThreadLocalGrContextFactory();
-
 #endif//DMGpuSupport_DEFINED
diff --git a/dm/DMSrcSink.cpp b/dm/DMSrcSink.cpp
index 2eb327e..affb844 100644
--- a/dm/DMSrcSink.cpp
+++ b/dm/DMSrcSink.cpp
@@ -141,22 +141,16 @@
     , fThreaded(threaded) {}
 
 int GPUSink::enclave() const {
-    return fThreaded ? kAnyThread_Enclave : kGPUSink_Enclave;
+    return fThreaded ? kAnyThread_Enclave : kGPU_Enclave;
 }
 
 Error GPUSink::draw(const Src& src, SkBitmap* dst, SkWStream*) const {
-    GrContextFactory* factory = GetThreadLocalGrContextFactory();
-    if (FLAGS_abandonGpuContext) {
-        factory->abandonContexts();
-    }
-    if (FLAGS_resetGpuContext || FLAGS_abandonGpuContext) {
-        factory->destroyContexts();
-    }
+    GrContextFactory factory;
     const SkISize size = src.size();
     const SkImageInfo info =
         SkImageInfo::Make(size.width(), size.height(), kN32_SkColorType, kPremul_SkAlphaType);
     SkAutoTUnref<SkSurface> surface(
-            NewGpuSurface(factory, fContextType, fGpuAPI, info, fSampleCount, fUseDFText));
+            NewGpuSurface(&factory, fContextType, fGpuAPI, info, fSampleCount, fUseDFText));
     if (!surface) {
         return "Could not create a surface.";
     }
@@ -168,6 +162,9 @@
     canvas->flush();
     dst->allocPixels(info);
     canvas->readPixels(dst, 0,0);
+    if (FLAGS_abandonGpuContext) {
+        factory.abandonContexts();
+    }
     return "";
 }
 
diff --git a/dm/DMSrcSink.h b/dm/DMSrcSink.h
index 31c3130..0683d81 100644
--- a/dm/DMSrcSink.h
+++ b/dm/DMSrcSink.h
@@ -41,7 +41,7 @@
     virtual const char* fileExtension() const  = 0;
 };
 
-enum { kAnyThread_Enclave, kGPUSink_Enclave, kPDFSink_Enclave };
+enum { kAnyThread_Enclave, kGPU_Enclave, kPDFSink_Enclave };
 static const int kNumEnclaves = kPDFSink_Enclave + 1;
 
 /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
diff --git a/gyp/dm.gypi b/gyp/dm.gypi
index 94726a2..4bfda6f 100644
--- a/gyp/dm.gypi
+++ b/gyp/dm.gypi
@@ -30,7 +30,6 @@
   ],
   'sources': [
     '../dm/DM.cpp',
-    '../dm/DMGpuSupport.cpp',
     '../dm/DMSrcSink.cpp',
     '../dm/DMJsonWriter.cpp',
     '../gm/gm.cpp',
diff --git a/tools/flags/SkCommonFlags.cpp b/tools/flags/SkCommonFlags.cpp
index a9a0bae..2a0490c 100644
--- a/tools/flags/SkCommonFlags.cpp
+++ b/tools/flags/SkCommonFlags.cpp
@@ -37,9 +37,7 @@
 
 DEFINE_bool2(quiet, q, false, "if true, don't print status updates.");
 
-DEFINE_bool(resetGpuContext, true, "Reset the GrContext before running each test.");
-DEFINE_bool(abandonGpuContext, false, "Abandon the GrContext after running each test. "
-                                      "Implies --resetGpuContext.");
+DEFINE_bool(abandonGpuContext, false, "Abandon the GrContext after running each test.");
 
 DEFINE_string(skps, "skps", "Directory to read skps from.");