Implement SkGLContext swapBuffers with fence syncs

Improves the GPU measuring accuracy of nanobench by using fence syncs.
Fence syncs are very widely supported and available on almost every
platform.

NO_MERGE_BUILDS
BUG=skia:

Review URL: https://codereview.chromium.org/1194783003
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp
index 65d9216..13ae9f6 100644
--- a/bench/nanobench.cpp
+++ b/bench/nanobench.cpp
@@ -79,7 +79,7 @@
 DEFINE_double(overheadGoal, 0.0001,
               "Loop until timer overhead is at most this fraction of our measurments.");
 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");
-DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag.");
+DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag.");
 DEFINE_bool(gpuCompressAlphaMasks, false, "Compress masks generated from falling back to "
                                           "software path rendering.");
 
@@ -144,7 +144,13 @@
         SK_GL(*this->gl, Finish());
     }
 
-    bool needsFrameTiming() const override { return true; }
+    bool needsFrameTiming(int* maxFrameLag) const override {
+        if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) {
+            // Frame lag is unknown.
+            *maxFrameLag = FLAGS_gpuFrameLag;
+        }
+        return true;
+    }
     bool init(SkImageInfo info, Benchmark* bench) override {
         uint32_t flags = this->config.useDFText ? SkSurfaceProps::kUseDistanceFieldFonts_Flag : 0;
         SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
@@ -155,6 +161,10 @@
         if (!this->surface.get()) {
             return false;
         }
+        if (!this->gl->fenceSyncSupport()) {
+            SkDebugf("WARNING: GL context for config \"%s\" does not support fence sync. "
+                     "Timings might not be accurate.\n", this->config.name);
+        }
         return true;
     }
     void fillOptions(ResultsWriter* log) override {
@@ -307,7 +317,8 @@
 
 static int gpu_bench(Target* target,
                      Benchmark* bench,
-                     double* samples) {
+                     double* samples,
+                     int maxGpuFrameLag) {
     // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.
     int loops = FLAGS_loops;
     if (kAutoTuneLoops == loops) {
@@ -321,9 +332,8 @@
             }
             loops *= 2;
             // If the GPU lets frames lag at all, we need to make sure we're timing
-            // _this_ round, not still timing last round.  We force this by looping
-            // more times than any reasonable GPU will allow frames to lag.
-            for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
+            // _this_ round, not still timing last round.
+            for (int i = 0; i < maxGpuFrameLag; i++) {
                 elapsed = time(loops, bench, target);
             }
         } while (elapsed < FLAGS_gpuMs);
@@ -340,7 +350,7 @@
 
     // Pretty much the same deal as the calibration: do some warmup to make
     // sure we're timing steady-state pipelined frames.
-    for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
+    for (int i = 0; i < maxGpuFrameLag - 1; i++) {
         time(loops, bench, target);
     }
 
@@ -429,6 +439,9 @@
 #ifdef SK_ANGLE
         GPU_CONFIG(angle, kANGLE_GLContextType, 0, false)
 #endif
+#if SK_MESA
+        GPU_CONFIG(mesa, kMESA_GLContextType, 0, false)
+#endif
     }
 #endif
 
@@ -1008,9 +1021,10 @@
             targets[j]->setup();
             bench->perCanvasPreDraw(canvas);
 
+            int frameLag;
             const int loops =
-                targets[j]->needsFrameTiming()
-                ? gpu_bench(targets[j], bench.get(), samples.get())
+                targets[j]->needsFrameTiming(&frameLag)
+                ? gpu_bench(targets[j], bench.get(), samples.get(), frameLag)
                 : cpu_bench(overhead, targets[j], bench.get(), samples.get());
 
             bench->perCanvasPostDraw(canvas);
diff --git a/bench/nanobench.h b/bench/nanobench.h
index 1dc0b8b..f556f7d 100644
--- a/bench/nanobench.h
+++ b/bench/nanobench.h
@@ -63,7 +63,7 @@
     /** CPU-like targets can just be timed, but GPU-like
         targets need to pay attention to frame boundaries
         or other similar details. */
-    virtual bool needsFrameTiming() const { return false; }
+    virtual bool needsFrameTiming(int* frameLag) const { return false; }
 
     /** Called once per target, during program initialization.
         Returns false if initialization fails. */
diff --git a/bench/nanobenchAndroid.cpp b/bench/nanobenchAndroid.cpp
index 3d5cda4..50673c9 100644
--- a/bench/nanobenchAndroid.cpp
+++ b/bench/nanobenchAndroid.cpp
@@ -42,7 +42,9 @@
     this->renderer.proxy->fence();
 }
 
-bool HWUITarget::needsFrameTiming() const {
+bool HWUITarget::needsFrameTiming(int* frameLag) const {
+    extern int FLAGS_gpuFrameLag;
+    *frameLag = FLAGS_gpuFrameLag;
     return true;
 }
 
diff --git a/bench/nanobenchAndroid.h b/bench/nanobenchAndroid.h
index 7f6ff44..16a81cf 100644
--- a/bench/nanobenchAndroid.h
+++ b/bench/nanobenchAndroid.h
@@ -23,7 +23,7 @@
     SkCanvas* beginTiming(SkCanvas* canvas) override;
     void endTiming() override;
     void fence() override;
-    bool needsFrameTiming() const override;
+    bool needsFrameTiming(int* frameLag) const override;
 
     bool init(SkImageInfo info, Benchmark* bench) override;
     bool capturePixels(SkBitmap* bmp) override;
diff --git a/include/gpu/gl/GrGLInterface.h b/include/gpu/gl/GrGLInterface.h
index e510551..8c72526 100644
--- a/include/gpu/gl/GrGLInterface.h
+++ b/include/gpu/gl/GrGLInterface.h
@@ -30,6 +30,8 @@
  * comments in GrGLConfig.h
  */
 
+typedef void(*GrGLFuncPtr)();
+
 struct GrGLInterface;
 
 const GrGLInterface* GrGLDefaultInterface();
diff --git a/include/gpu/gl/SkGLContext.h b/include/gpu/gl/SkGLContext.h
index 6ca7bf5..8209c31 100644
--- a/include/gpu/gl/SkGLContext.h
+++ b/include/gpu/gl/SkGLContext.h
@@ -9,6 +9,7 @@
 #define SkGLContext_DEFINED
 
 #include "GrGLInterface.h"
+#include "../../src/gpu/SkGpuFenceSync.h"
 
 /**
  * Create an offscreen opengl context with an RGBA8 / 8bit stencil FBO.
@@ -25,19 +26,32 @@
 
     const GrGLInterface* gl() const { return fGL.get(); }
 
-    virtual void makeCurrent() const = 0;
+    bool fenceSyncSupport() const { return SkToBool(fFenceSync); }
+
+    bool getMaxGpuFrameLag(int* maxFrameLag) const {
+        if (!fFenceSync) {
+            return false;
+        }
+        *maxFrameLag = kMaxFrameLag;
+        return true;
+    }
+
+    void makeCurrent() const;
 
     /**
-     * The primary purpose of this function it to provide a means of scheduling
+     * The only purpose of this function it to provide a means of scheduling
      * work on the GPU (since all of the subclasses create primary buffers for
      * testing that are small and not meant to be rendered to the screen).
      *
-     * If the drawing surface provided by the platform is double buffered this
-     * call will cause the platform to swap which buffer is currently being
-     * targeted.  If the current surface does not include a back buffer, this
-     * call has no effect.
+     * If the platform supports fence sync (OpenGL 3.2+ or EGL_KHR_fence_sync),
+     * this will not swap any buffers, but rather emulate triple buffer
+     * synchronization using fences.
+     *
+     * Otherwise it will call the platform SwapBuffers method. This may or may
+     * not perform some sort of synchronization, depending on whether the
+     * drawing surface provided by the platform is double buffered.
      */
-    virtual void swapBuffers() const = 0;
+    void swapBuffers();
 
     /**
      * This notifies the context that we are deliberately testing abandoning
@@ -47,13 +61,37 @@
      */
     void testAbandon();
 
+    class GLFenceSync;  // SkGpuFenceSync implementation that uses the OpenGL functionality.
+
 protected:
     SkGLContext();
 
+    /*
+     * Methods that sublcasses must call from their constructors and destructors.
+     */
+    void init(const GrGLInterface*, SkGpuFenceSync* = NULL);
+    void teardown();
+
+    /*
+     * Operations that have a platform-dependent implementation.
+     */
+    virtual void onPlatformMakeCurrent() const = 0;
+    virtual void onPlatformSwapBuffers() const = 0;
+    virtual GrGLFuncPtr onPlatformGetProcAddress(const char*) const = 0;
+
+private:
+    enum { kMaxFrameLag = 3 };
+
+    SkAutoTDelete<SkGpuFenceSync> fFenceSync;
+    SkPlatformGpuFence            fFrameFences[kMaxFrameLag - 1];
+    int                           fCurrentFenceIdx;
+
     /** Subclass provides the gl interface object if construction was
      *  successful. */
     SkAutoTUnref<const GrGLInterface> fGL;
 
+    friend class GLFenceSync;  // For onPlatformGetProcAddress.
+
     typedef SkRefCnt INHERITED;
 };
 
diff --git a/include/gpu/gl/SkNullGLContext.h b/include/gpu/gl/SkNullGLContext.h
index ca71dde..1f63438 100644
--- a/include/gpu/gl/SkNullGLContext.h
+++ b/include/gpu/gl/SkNullGLContext.h
@@ -13,8 +13,6 @@
 class SK_API SkNullGLContext : public SkGLContext {
 public:
     ~SkNullGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override {};
 
     static SkNullGLContext* Create(GrGLStandard);
 
@@ -23,6 +21,10 @@
 private:
     SkNullGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override {}
+    GrGLFuncPtr onPlatformGetProcAddress(const char*) const override { return NULL; }
+
     ContextState* fState;
 };
 
diff --git a/include/gpu/gl/angle/SkANGLEGLContext.h b/include/gpu/gl/angle/SkANGLEGLContext.h
index f54f29e..8850cd0 100644
--- a/include/gpu/gl/angle/SkANGLEGLContext.h
+++ b/include/gpu/gl/angle/SkANGLEGLContext.h
@@ -15,8 +15,6 @@
 class SkANGLEGLContext : public SkGLContext {
 public:
     ~SkANGLEGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override;
 
     static SkANGLEGLContext* Create(GrGLStandard forcedGpuAPI) {
         if (kGL_GrGLStandard == forcedGpuAPI) {
@@ -37,6 +35,10 @@
     SkANGLEGLContext();
     void destroyGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override;
+    GrGLFuncPtr onPlatformGetProcAddress(const char* name) const override;
+
     void* fContext;
     void* fDisplay;
     void* fSurface;
diff --git a/src/gpu/SkGpuFenceSync.h b/src/gpu/SkGpuFenceSync.h
new file mode 100644
index 0000000..b78398f
--- /dev/null
+++ b/src/gpu/SkGpuFenceSync.h
@@ -0,0 +1,29 @@
+
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkGpuFenceSync_DEFINED
+#define SkGpuFenceSync_DEFINED
+
+#include "SkTypes.h"
+
+typedef void* SkPlatformGpuFence;
+
+/*
+ * This class provides an interface to interact with fence syncs. A fence sync is an object that the
+ * client can insert into the GPU command stream, and then at any future time, wait until all
+ * commands that were issued before the fence have completed.
+ */
+class SkGpuFenceSync {
+public:
+    virtual SkPlatformGpuFence SK_WARN_UNUSED_RESULT insertFence() const = 0;
+    virtual bool flushAndWaitFence(SkPlatformGpuFence) const = 0;
+    virtual void deleteFence(SkPlatformGpuFence) const = 0;
+
+    virtual ~SkGpuFenceSync() {}
+};
+
+#endif
diff --git a/src/gpu/gl/GrGLAssembleInterface.h b/src/gpu/gl/GrGLAssembleInterface.h
index aa0fbca..2a4835b 100644
--- a/src/gpu/gl/GrGLAssembleInterface.h
+++ b/src/gpu/gl/GrGLAssembleInterface.h
@@ -8,7 +8,6 @@
 
 #include "gl/GrGLInterface.h"
 
-typedef void(*GrGLFuncPtr)();
 typedef GrGLFuncPtr (*GrGLGetProc)(void* ctx, const char name[]);
 
 
diff --git a/src/gpu/gl/SkGLContext.cpp b/src/gpu/gl/SkGLContext.cpp
index 7cc728d..8975a98 100644
--- a/src/gpu/gl/SkGLContext.cpp
+++ b/src/gpu/gl/SkGLContext.cpp
@@ -7,16 +7,148 @@
  */
 #include "gl/SkGLContext.h"
 #include "GrGLUtil.h"
+#include "SkGpuFenceSync.h"
 
-SkGLContext::SkGLContext() {
+class SkGLContext::GLFenceSync : public SkGpuFenceSync {
+public:
+    static GLFenceSync* CreateIfSupported(const SkGLContext*);
+
+    SkPlatformGpuFence SK_WARN_UNUSED_RESULT insertFence() const override;
+    bool flushAndWaitFence(SkPlatformGpuFence fence) const override;
+    void deleteFence(SkPlatformGpuFence fence) const override;
+
+private:
+    GLFenceSync() {}
+
+    static const GrGLenum GL_SYNC_GPU_COMMANDS_COMPLETE  = 0x9117;
+    static const GrGLenum GL_WAIT_FAILED                 = 0x911d;
+    static const GrGLbitfield GL_SYNC_FLUSH_COMMANDS_BIT = 0x00000001;
+
+    typedef struct __GLsync *GLsync;
+
+    typedef GLsync (GR_GL_FUNCTION_TYPE* GLFenceSyncProc) (GrGLenum, GrGLbitfield);
+    typedef GrGLenum (GR_GL_FUNCTION_TYPE* GLClientWaitSyncProc) (GLsync, GrGLbitfield, GrGLuint64);
+    typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GLDeleteSyncProc) (GLsync);
+
+    GLFenceSyncProc        fGLFenceSync;
+    GLClientWaitSyncProc   fGLClientWaitSync;
+    GLDeleteSyncProc       fGLDeleteSync;
+
+    typedef SkGpuFenceSync INHERITED;
+};
+
+SkGLContext::SkGLContext()
+    : fCurrentFenceIdx(0) {
+    memset(fFrameFences, 0, sizeof(fFrameFences));
 }
 
 SkGLContext::~SkGLContext() {
-    SkASSERT(NULL == fGL.get());  // Subclass should destroy the interface.
+    // Subclass should call teardown.
+#ifdef SK_DEBUG
+    for (size_t i = 0; i < SK_ARRAY_COUNT(fFrameFences); i++) {
+        SkASSERT(0 == fFrameFences[i]);
+    }
+#endif
+    SkASSERT(NULL == fGL.get());
+    SkASSERT(NULL == fFenceSync.get());
+}
+
+void SkGLContext::init(const GrGLInterface* gl, SkGpuFenceSync* fenceSync) {
+    SkASSERT(!fGL.get());
+    fGL.reset(gl);
+    fFenceSync.reset(fenceSync ? fenceSync : GLFenceSync::CreateIfSupported(this));
+}
+
+void SkGLContext::teardown() {
+    if (fFenceSync) {
+        for (size_t i = 0; i < SK_ARRAY_COUNT(fFrameFences); i++) {
+            if (fFrameFences[i]) {
+                fFenceSync->deleteFence(fFrameFences[i]);
+                fFrameFences[i] = 0;
+            }
+        }
+        fFenceSync.reset(NULL);
+    }
+
+    fGL.reset(NULL);
+}
+
+void SkGLContext::makeCurrent() const {
+    this->onPlatformMakeCurrent();
+}
+
+void SkGLContext::swapBuffers() {
+    if (!fFenceSync) {
+        // Fallback on the platform SwapBuffers method for synchronization. This may have no effect.
+        this->onPlatformSwapBuffers();
+        return;
+    }
+
+    if (fFrameFences[fCurrentFenceIdx]) {
+        if (!fFenceSync->flushAndWaitFence(fFrameFences[fCurrentFenceIdx])) {
+            SkDebugf("WARNING: Wait failed for fence sync. Timings might not be accurate.\n");
+        }
+        fFenceSync->deleteFence(fFrameFences[fCurrentFenceIdx]);
+    }
+
+    fFrameFences[fCurrentFenceIdx] = fFenceSync->insertFence();
+    fCurrentFenceIdx = (fCurrentFenceIdx + 1) % SK_ARRAY_COUNT(fFrameFences);
 }
 
 void SkGLContext::testAbandon() {
     if (fGL) {
         fGL->abandon();
     }
+    if (fFenceSync) {
+        memset(fFrameFences, 0, sizeof(fFrameFences));
+    }
+}
+
+SkGLContext::GLFenceSync* SkGLContext::GLFenceSync::CreateIfSupported(const SkGLContext* ctx) {
+    SkAutoTDelete<GLFenceSync> ret(SkNEW(GLFenceSync));
+
+    if (kGL_GrGLStandard == ctx->gl()->fStandard) {
+        const GrGLubyte* versionStr;
+        SK_GL_RET(*ctx, versionStr, GetString(GR_GL_VERSION));
+        GrGLVersion version = GrGLGetVersionFromString(reinterpret_cast<const char*>(versionStr));
+        if (version < GR_GL_VER(3,2) && !ctx->gl()->hasExtension("GL_ARB_sync")) {
+            return NULL;
+        }
+        ret->fGLFenceSync = reinterpret_cast<GLFenceSyncProc>(
+            ctx->onPlatformGetProcAddress("glFenceSync"));
+        ret->fGLClientWaitSync = reinterpret_cast<GLClientWaitSyncProc>(
+            ctx->onPlatformGetProcAddress("glClientWaitSync"));
+        ret->fGLDeleteSync = reinterpret_cast<GLDeleteSyncProc>(
+            ctx->onPlatformGetProcAddress("glDeleteSync"));
+    } else {
+        if (!ctx->gl()->hasExtension("GL_APPLE_sync")) {
+            return NULL;
+        }
+        ret->fGLFenceSync = reinterpret_cast<GLFenceSyncProc>(
+            ctx->onPlatformGetProcAddress("glFenceSyncAPPLE"));
+        ret->fGLClientWaitSync = reinterpret_cast<GLClientWaitSyncProc>(
+            ctx->onPlatformGetProcAddress("glClientWaitSyncAPPLE"));
+        ret->fGLDeleteSync = reinterpret_cast<GLDeleteSyncProc>(
+            ctx->onPlatformGetProcAddress("glDeleteSyncAPPLE"));
+    }
+
+    if (!ret->fGLFenceSync || !ret->fGLClientWaitSync || !ret->fGLDeleteSync) {
+        return NULL;
+    }
+
+    return ret.detach();
+}
+
+SkPlatformGpuFence SkGLContext::GLFenceSync::insertFence() const {
+    return fGLFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+}
+
+bool SkGLContext::GLFenceSync::flushAndWaitFence(SkPlatformGpuFence fence) const {
+    GLsync glsync = static_cast<GLsync>(fence);
+    return GL_WAIT_FAILED != fGLClientWaitSync(glsync, GL_SYNC_FLUSH_COMMANDS_BIT, -1);
+}
+
+void SkGLContext::GLFenceSync::deleteFence(SkPlatformGpuFence fence) const {
+    GLsync glsync = static_cast<GLsync>(fence);
+    fGLDeleteSync(glsync);
 }
diff --git a/src/gpu/gl/SkNullGLContext.cpp b/src/gpu/gl/SkNullGLContext.cpp
index 7ced85e..1d67578 100644
--- a/src/gpu/gl/SkNullGLContext.cpp
+++ b/src/gpu/gl/SkNullGLContext.cpp
@@ -547,7 +547,7 @@
 SkNullGLContext::SkNullGLContext() {
     fState = SkNEW(ContextState);
     GrGLInterface* interface = create_null_interface(fState);
-    fGL.reset(interface);
+    this->init(interface);
 #if GR_GL_PER_GL_FUNC_CALLBACK
     interface->fCallback = set_current_context_from_interface;
     interface->fCallbackData = reinterpret_cast<GrGLInterfaceCallbackData>(fState);
@@ -555,8 +555,8 @@
 }
 
 SkNullGLContext::~SkNullGLContext() {
-    fGL.reset(NULL);
+    this->teardown();
     fState->unref();
 }
 
-void SkNullGLContext::makeCurrent() const { set_current_context(fState); }
+void SkNullGLContext::onPlatformMakeCurrent() const { set_current_context(fState); }
diff --git a/src/gpu/gl/angle/SkANGLEGLContext.cpp b/src/gpu/gl/angle/SkANGLEGLContext.cpp
index cea2adf..f632198 100644
--- a/src/gpu/gl/angle/SkANGLEGLContext.cpp
+++ b/src/gpu/gl/angle/SkANGLEGLContext.cpp
@@ -96,25 +96,27 @@
 
     eglMakeCurrent(fDisplay, fSurface, fSurface, fContext);
 
-    fGL.reset(GrGLCreateANGLEInterface());
-    if (NULL == fGL.get()) {
+    SkAutoTUnref<const GrGLInterface> gl(GrGLCreateANGLEInterface());
+    if (NULL == gl.get()) {
         SkDebugf("Could not create ANGLE GL interface!\n");
         this->destroyGLContext();
         return;
     }
-    if (!fGL->validate()) {
+    if (!gl->validate()) {
         SkDebugf("Could not validate ANGLE GL interface!\n");
         this->destroyGLContext();
         return;
     }
+
+    this->init(gl.detach());
 }
 
 SkANGLEGLContext::~SkANGLEGLContext() {
+    this->teardown();
     this->destroyGLContext();
 }
 
 void SkANGLEGLContext::destroyGLContext() {
-    fGL.reset(NULL);
     if (fDisplay) {
         eglMakeCurrent(fDisplay, 0, 0, 0);
 
@@ -133,14 +135,18 @@
     }
 }
 
-void SkANGLEGLContext::makeCurrent() const {
+void SkANGLEGLContext::onPlatformMakeCurrent() const {
     if (!eglMakeCurrent(fDisplay, fSurface, fSurface, fContext)) {
         SkDebugf("Could not set the context.\n");
     }
 }
 
-void SkANGLEGLContext::swapBuffers() const {
+void SkANGLEGLContext::onPlatformSwapBuffers() const {
     if (!eglSwapBuffers(fDisplay, fSurface)) {
         SkDebugf("Could not complete eglSwapBuffers.\n");
     }
 }
+
+GrGLFuncPtr SkANGLEGLContext::onPlatformGetProcAddress(const char* name) const {
+    return eglGetProcAddress(name);
+}
diff --git a/src/gpu/gl/debug/SkDebugGLContext.cpp b/src/gpu/gl/debug/SkDebugGLContext.cpp
index ae55104..531e6c3 100644
--- a/src/gpu/gl/debug/SkDebugGLContext.cpp
+++ b/src/gpu/gl/debug/SkDebugGLContext.cpp
@@ -9,9 +9,9 @@
 #include "gl/debug/SkDebugGLContext.h"
 
 SkDebugGLContext::SkDebugGLContext() {
-    fGL.reset(GrGLCreateDebugInterface());
+    this->init(GrGLCreateDebugInterface());
 }
 
 SkDebugGLContext::~SkDebugGLContext() {
-    fGL.reset(NULL);
+    this->teardown();
 }
diff --git a/src/gpu/gl/debug/SkDebugGLContext.h b/src/gpu/gl/debug/SkDebugGLContext.h
index 5779532..6a4d9fc 100644
--- a/src/gpu/gl/debug/SkDebugGLContext.h
+++ b/src/gpu/gl/debug/SkDebugGLContext.h
@@ -13,8 +13,6 @@
 class SkDebugGLContext : public SkGLContext {
 public:
     ~SkDebugGLContext() override;
-    void makeCurrent() const override {}
-    void swapBuffers() const override {}
 
     static SkDebugGLContext* Create(GrGLStandard forcedGpuAPI) {
         if (kGLES_GrGLStandard == forcedGpuAPI) {
@@ -23,6 +21,10 @@
         return SkNEW(SkDebugGLContext);
     }
 private:
+    void onPlatformMakeCurrent() const override {}
+    void onPlatformSwapBuffers() const override {}
+    GrGLFuncPtr onPlatformGetProcAddress(const char*) const override { return NULL; }
+
     SkDebugGLContext();
 };
 
diff --git a/src/gpu/gl/egl/SkCreatePlatformGLContext_egl.cpp b/src/gpu/gl/egl/SkCreatePlatformGLContext_egl.cpp
index d57f761..9ed57a3 100644
--- a/src/gpu/gl/egl/SkCreatePlatformGLContext_egl.cpp
+++ b/src/gpu/gl/egl/SkCreatePlatformGLContext_egl.cpp
@@ -8,20 +8,42 @@
 #include "gl/SkGLContext.h"
 
 #include <GLES2/gl2.h>
+
+#define EGL_EGLEXT_PROTOTYPES
 #include <EGL/egl.h>
+#include <EGL/eglext.h>
 
 namespace {
 
+// TODO: Share this class with ANGLE if/when it gets support for EGL_KHR_fence_sync.
+class SkEGLFenceSync : public SkGpuFenceSync {
+public:
+    static SkEGLFenceSync* CreateIfSupported(EGLDisplay);
+
+    SkPlatformGpuFence SK_WARN_UNUSED_RESULT insertFence() const override;
+    bool flushAndWaitFence(SkPlatformGpuFence fence) const override;
+    void deleteFence(SkPlatformGpuFence fence) const override;
+
+private:
+    SkEGLFenceSync(EGLDisplay display) : fDisplay(display) {}
+
+    EGLDisplay                    fDisplay;
+
+    typedef SkGpuFenceSync INHERITED;
+};
+
 class EGLGLContext : public SkGLContext  {
 public:
     EGLGLContext(GrGLStandard forcedGpuAPI);
     ~EGLGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override;
 
 private:
     void destroyGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override;
+    GrGLFuncPtr onPlatformGetProcAddress(const char*) const override;
+
     EGLContext fContext;
     EGLDisplay fDisplay;
     EGLSurface fSurface;
@@ -69,7 +91,9 @@
     }
     SkASSERT(forcedGpuAPI == kNone_GrGLStandard || kAPIs[api].fStandard == forcedGpuAPI);
 
-    for (; NULL == fGL.get() && api < apiLimit; ++api) {
+    SkAutoTUnref<const GrGLInterface> gl;
+
+    for (; NULL == gl.get() && api < apiLimit; ++api) {
         fDisplay = eglGetDisplay(EGL_DEFAULT_DISPLAY);
 
         EGLint majorVersion;
@@ -134,27 +158,30 @@
             continue;
         }
 
-        fGL.reset(GrGLCreateNativeInterface());
-        if (NULL == fGL.get()) {
+        gl.reset(GrGLCreateNativeInterface());
+        if (NULL == gl.get()) {
             SkDebugf("Failed to create gl interface.\n");
             this->destroyGLContext();
             continue;
         }
 
-        if (!fGL->validate()) {
+        if (!gl->validate()) {
             SkDebugf("Failed to validate gl interface.\n");
             this->destroyGLContext();
             continue;
         }
+
+        this->init(gl.detach(), SkEGLFenceSync::CreateIfSupported(fDisplay));
+        break;
     }
 }
 
 EGLGLContext::~EGLGLContext() {
+    this->teardown();
     this->destroyGLContext();
 }
 
 void EGLGLContext::destroyGLContext() {
-    fGL.reset(NULL);
     if (fDisplay) {
         eglMakeCurrent(fDisplay, 0, 0, 0);
 
@@ -174,18 +201,61 @@
 }
 
 
-void EGLGLContext::makeCurrent() const {
+void EGLGLContext::onPlatformMakeCurrent() const {
     if (!eglMakeCurrent(fDisplay, fSurface, fSurface, fContext)) {
         SkDebugf("Could not set the context.\n");
     }
 }
 
-void EGLGLContext::swapBuffers() const {
+void EGLGLContext::onPlatformSwapBuffers() const {
     if (!eglSwapBuffers(fDisplay, fSurface)) {
         SkDebugf("Could not complete eglSwapBuffers.\n");
     }
 }
 
+GrGLFuncPtr EGLGLContext::onPlatformGetProcAddress(const char* procName) const {
+    return eglGetProcAddress(procName);
+}
+
+static bool supports_egl_extension(EGLDisplay display, const char* extension) {
+    int extensionLength = strlen(extension);
+    const char* extensionsStr = eglQueryString(display, EGL_EXTENSIONS);
+    while (const char* match = strstr(extensionsStr, extension)) {
+        // Ensure the string we found is its own extension, not a substring of a larger extension
+        // (e.g. GL_ARB_occlusion_query / GL_ARB_occlusion_query2).
+        if ((match == extensionsStr || match[-1] == ' ') &&
+            (match[extensionLength] == ' ' || match[extensionLength] == '\0')) {
+            return true;
+        }
+        extensionsStr = match + extensionLength;
+    }
+    return false;
+}
+
+SkEGLFenceSync* SkEGLFenceSync::CreateIfSupported(EGLDisplay display) {
+    if (!display || !supports_egl_extension(display, "EGL_KHR_fence_sync")) {
+        return NULL;
+    }
+    return SkNEW_ARGS(SkEGLFenceSync, (display));
+}
+
+SkPlatformGpuFence SkEGLFenceSync::insertFence() const {
+    return eglCreateSyncKHR(fDisplay, EGL_SYNC_FENCE_KHR, NULL);
+}
+
+bool SkEGLFenceSync::flushAndWaitFence(SkPlatformGpuFence platformFence) const {
+    EGLSyncKHR eglsync = static_cast<EGLSyncKHR>(platformFence);
+    return EGL_CONDITION_SATISFIED_KHR == eglClientWaitSyncKHR(fDisplay,
+                                                               eglsync,
+                                                               EGL_SYNC_FLUSH_COMMANDS_BIT_KHR,
+                                                               EGL_FOREVER_KHR);
+}
+
+void SkEGLFenceSync::deleteFence(SkPlatformGpuFence platformFence) const {
+    EGLSyncKHR eglsync = static_cast<EGLSyncKHR>(platformFence);
+    eglDestroySyncKHR(fDisplay, eglsync);
+}
+
 } // anonymous namespace
 
 SkGLContext* SkCreatePlatformGLContext(GrGLStandard forcedGpuAPI) {
diff --git a/src/gpu/gl/glx/SkCreatePlatformGLContext_glx.cpp b/src/gpu/gl/glx/SkCreatePlatformGLContext_glx.cpp
index 8006d49..7933757 100644
--- a/src/gpu/gl/glx/SkCreatePlatformGLContext_glx.cpp
+++ b/src/gpu/gl/glx/SkCreatePlatformGLContext_glx.cpp
@@ -48,12 +48,14 @@
 public:
     GLXGLContext(GrGLStandard forcedGpuAPI);
     ~GLXGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override;
 
 private:
     void destroyGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override;
+    GrGLFuncPtr onPlatformGetProcAddress(const char*) const override;
+
     GLXContext fContext;
     Display* fDisplay;
     Pixmap fPixmap;
@@ -267,27 +269,29 @@
         return;
     }
 
-    fGL.reset(GrGLCreateNativeInterface());
-    if (NULL == fGL.get()) {
+    SkAutoTUnref<const GrGLInterface> gl(GrGLCreateNativeInterface());
+    if (NULL == gl.get()) {
         SkDebugf("Failed to create gl interface");
         this->destroyGLContext();
         return;
     }
 
-    if (!fGL->validate()) {
+    if (!gl->validate()) {
         SkDebugf("Failed to validate gl interface");
         this->destroyGLContext();
         return;
     }
+
+    this->init(gl.detach());
 }
 
 
 GLXGLContext::~GLXGLContext() {
+    this->teardown();
     this->destroyGLContext();
 }
 
 void GLXGLContext::destroyGLContext() {
-    fGL.reset(NULL);
     if (fDisplay) {
         glXMakeCurrent(fDisplay, 0, 0);
 
@@ -311,16 +315,20 @@
     }
 }
 
-void GLXGLContext::makeCurrent() const {
+void GLXGLContext::onPlatformMakeCurrent() const {
     if (!glXMakeCurrent(fDisplay, fGlxPixmap, fContext)) {
         SkDebugf("Could not set the context.\n");
     }
 }
 
-void GLXGLContext::swapBuffers() const {
+void GLXGLContext::onPlatformSwapBuffers() const {
     glXSwapBuffers(fDisplay, fGlxPixmap);
 }
 
+GrGLFuncPtr GLXGLContext::onPlatformGetProcAddress(const char* procName) const {
+    return glXGetProcAddress(reinterpret_cast<const GLubyte*>(procName));
+}
+
 } // anonymous namespace
 
 SkGLContext* SkCreatePlatformGLContext(GrGLStandard forcedGpuAPI) {
diff --git a/src/gpu/gl/iOS/SkCreatePlatformGLContext_iOS.mm b/src/gpu/gl/iOS/SkCreatePlatformGLContext_iOS.mm
index 8842168..08e6f23 100644
--- a/src/gpu/gl/iOS/SkCreatePlatformGLContext_iOS.mm
+++ b/src/gpu/gl/iOS/SkCreatePlatformGLContext_iOS.mm
@@ -8,6 +8,7 @@
 
 #include "gl/SkGLContext.h"
 #import <OpenGLES/EAGL.h>
+#include <dlfcn.h>
 
 #define EAGLCTX ((EAGLContext*)(fEAGLContext))
 
@@ -17,40 +18,50 @@
 public:
     IOSGLContext();
     ~IOSGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override;
 
 private:
     void destroyGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override;
+    GrGLFuncPtr onPlatformGetProcAddress(const char*) const override;
+
     void* fEAGLContext;
+    void* fGLLibrary;
 };
 
 IOSGLContext::IOSGLContext()
-    : fEAGLContext(NULL) {
+    : fEAGLContext(NULL)
+    , fGLLibrary(RTLD_DEFAULT) {
 
     fEAGLContext = [[EAGLContext alloc] initWithAPI:kEAGLRenderingAPIOpenGLES2];
     [EAGLContext setCurrentContext:EAGLCTX];
 
-    fGL.reset(GrGLCreateNativeInterface());
-    if (NULL == fGL.get()) {
+    SkAutoTUnref<const GrGLInterface> gl(GrGLCreateNativeInterface());
+    if (NULL == gl.get()) {
         SkDebugf("Failed to create gl interface");
         this->destroyGLContext();
         return;
     }
-    if (!fGL->validate()) {
+    if (!gl->validate()) {
         SkDebugf("Failed to validate gl interface");
         this->destroyGLContext();
         return;
     }
+
+    fGLLibrary = dlopen(
+        "/System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libGL.dylib",
+        RTLD_LAZY);
+
+    this->init(gl.detach());
 }
 
 IOSGLContext::~IOSGLContext() {
+    this->teardown();
     this->destroyGLContext();
 }
 
 void IOSGLContext::destroyGLContext() {
-    fGL.reset(NULL);
     if (fEAGLContext) {
         if ([EAGLContext currentContext] == EAGLCTX) {
             [EAGLContext setCurrentContext:nil];
@@ -58,16 +69,23 @@
         [EAGLCTX release];
         fEAGLContext = NULL;
     }
+    if (RTLD_DEFAULT != fGLLibrary) {
+        dlclose(fGLLibrary);
+    }
 }
 
 
-void IOSGLContext::makeCurrent() const {
+void IOSGLContext::onPlatformMakeCurrent() const {
     if (![EAGLContext setCurrentContext:EAGLCTX]) {
         SkDebugf("Could not set the context.\n");
     }
 }
 
-void IOSGLContext::swapBuffers() const { }
+void IOSGLContext::onPlatformSwapBuffers() const { }
+
+GrGLFuncPtr IOSGLContext::onPlatformGetProcAddress(const char* procName) const {
+    return reinterpret_cast<GrGLFuncPtr>(dlsym(fGLLibrary, procName));
+}
 
 } // anonymous namespace
 
diff --git a/src/gpu/gl/mac/SkCreatePlatformGLContext_mac.cpp b/src/gpu/gl/mac/SkCreatePlatformGLContext_mac.cpp
index 436c53f..d2d8569 100644
--- a/src/gpu/gl/mac/SkCreatePlatformGLContext_mac.cpp
+++ b/src/gpu/gl/mac/SkCreatePlatformGLContext_mac.cpp
@@ -9,23 +9,28 @@
 #include "AvailabilityMacros.h"
 
 #include <OpenGL/OpenGL.h>
+#include <dlfcn.h>
 
 namespace {
 class MacGLContext : public SkGLContext {
 public:
     MacGLContext();
     ~MacGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override;
 
 private:
     void destroyGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override;
+    GrGLFuncPtr onPlatformGetProcAddress(const char*) const override;
+
     CGLContextObj fContext;
+    void* fGLLibrary;
 };
 
 MacGLContext::MacGLContext()
-    : fContext(NULL) {
+    : fContext(NULL)
+    , fGLLibrary(RTLD_DEFAULT) {
     CGLPixelFormatAttribute attributes[] = {
 #if MAC_OS_X_VERSION_10_7
         kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute) kCGLOGLPVersion_3_2_Core,
@@ -53,39 +58,52 @@
 
     CGLSetCurrentContext(fContext);
 
-    fGL.reset(GrGLCreateNativeInterface());
-    if (NULL == fGL.get()) {
+    SkAutoTUnref<const GrGLInterface> gl(GrGLCreateNativeInterface());
+    if (NULL == gl.get()) {
         SkDebugf("Context could not create GL interface.\n");
         this->destroyGLContext();
         return;
     }
-    if (!fGL->validate()) {
+    if (!gl->validate()) {
         SkDebugf("Context could not validate GL interface.\n");
         this->destroyGLContext();
         return;
     }
+
+    fGLLibrary = dlopen(
+        "/System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libGL.dylib",
+        RTLD_LAZY);
+
+    this->init(gl.detach());
 }
 
 MacGLContext::~MacGLContext() {
+    this->teardown();
     this->destroyGLContext();
 }
 
 void MacGLContext::destroyGLContext() {
-    fGL.reset(NULL);
     if (fContext) {
         CGLReleaseContext(fContext);
         fContext = NULL;
     }
+    if (RTLD_DEFAULT != fGLLibrary) {
+        dlclose(fGLLibrary);
+    }
 }
 
-void MacGLContext::makeCurrent() const {
+void MacGLContext::onPlatformMakeCurrent() const {
     CGLSetCurrentContext(fContext);
 }
 
-void MacGLContext::swapBuffers() const {
+void MacGLContext::onPlatformSwapBuffers() const {
     CGLFlushDrawable(fContext);
 }
 
+GrGLFuncPtr MacGLContext::onPlatformGetProcAddress(const char* procName) const {
+    return reinterpret_cast<GrGLFuncPtr>(dlsym(fGLLibrary, procName));
+}
+
 } // anonymous namespace
 
 SkGLContext* SkCreatePlatformGLContext(GrGLStandard forcedGpuAPI) {
diff --git a/src/gpu/gl/mesa/SkMesaGLContext.cpp b/src/gpu/gl/mesa/SkMesaGLContext.cpp
index 1fac5fa..701cc0d 100644
--- a/src/gpu/gl/mesa/SkMesaGLContext.cpp
+++ b/src/gpu/gl/mesa/SkMesaGLContext.cpp
@@ -50,26 +50,28 @@
         return;
     }
 
-    fGL.reset(GrGLCreateMesaInterface());
-    if (NULL == fGL.get()) {
+    SkAutoTUnref<const GrGLInterface> gl(GrGLCreateMesaInterface());
+    if (NULL == gl.get()) {
         SkDebugf("Could not create GL interface!\n");
         this->destroyGLContext();
         return;
     }
 
-    if (!fGL->validate()) {
+    if (!gl->validate()) {
         SkDebugf("Could not validate GL interface!\n");
         this->destroyGLContext();
         return;
     }
+
+    this->init(gl.detach());
 }
 
 SkMesaGLContext::~SkMesaGLContext() {
+    this->teardown();
     this->destroyGLContext();
 }
 
 void SkMesaGLContext::destroyGLContext() {
-    fGL.reset(NULL);
     if (fImage) {
         sk_free(fImage);
         fImage = NULL;
@@ -83,7 +85,7 @@
 
 
 
-void SkMesaGLContext::makeCurrent() const {
+void SkMesaGLContext::onPlatformMakeCurrent() const {
     if (fContext) {
         if (!OSMesaMakeCurrent((OSMesaContext)fContext, fImage,
                                GR_GL_UNSIGNED_BYTE, gBOGUS_SIZE, gBOGUS_SIZE)) {
@@ -92,4 +94,8 @@
     }
 }
 
-void SkMesaGLContext::swapBuffers() const { }
+void SkMesaGLContext::onPlatformSwapBuffers() const { }
+
+GrGLFuncPtr SkMesaGLContext::onPlatformGetProcAddress(const char* procName) const {
+    return OSMesaGetProcAddress(procName);
+}
diff --git a/src/gpu/gl/mesa/SkMesaGLContext.h b/src/gpu/gl/mesa/SkMesaGLContext.h
index bf0c7e9..fa3df7b 100644
--- a/src/gpu/gl/mesa/SkMesaGLContext.h
+++ b/src/gpu/gl/mesa/SkMesaGLContext.h
@@ -18,8 +18,6 @@
 
 public:
     ~SkMesaGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override;
 
     static SkMesaGLContext* Create(GrGLStandard forcedGpuAPI) {
         if (kGLES_GrGLStandard == forcedGpuAPI) {
@@ -37,6 +35,10 @@
     SkMesaGLContext();
     void destroyGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override;
+    GrGLFuncPtr onPlatformGetProcAddress(const char*) const override;
+
     Context fContext;
     GrGLubyte *fImage;
 };
diff --git a/src/gpu/gl/win/SkCreatePlatformGLContext_win.cpp b/src/gpu/gl/win/SkCreatePlatformGLContext_win.cpp
index d387ef4..8a4c5db 100644
--- a/src/gpu/gl/win/SkCreatePlatformGLContext_win.cpp
+++ b/src/gpu/gl/win/SkCreatePlatformGLContext_win.cpp
@@ -21,12 +21,14 @@
 public:
     WinGLContext(GrGLStandard forcedGpuAPI);
 	~WinGLContext() override;
-    void makeCurrent() const override;
-    void swapBuffers() const override;
 
 private:
     void destroyGLContext();
 
+    void onPlatformMakeCurrent() const override;
+    void onPlatformSwapBuffers() const override;
+    GrGLFuncPtr onPlatformGetProcAddress(const char* name) const override;
+
     HWND fWindow;
     HDC fDeviceContext;
     HGLRC fGlRenderContext;
@@ -113,25 +115,27 @@
         return;
     }
 
-    fGL.reset(GrGLCreateNativeInterface());
-    if (NULL == fGL.get()) {
+    SkAutoTUnref<const GrGLInterface> gl(GrGLCreateNativeInterface());
+    if (NULL == gl.get()) {
         SkDebugf("Could not create GL interface.\n");
         this->destroyGLContext();
         return;
     }
-    if (!fGL->validate()) {
+    if (!gl->validate()) {
         SkDebugf("Could not validate GL interface.\n");
         this->destroyGLContext();
         return;
     }
+
+    this->init(gl.detach());
 }
 
 WinGLContext::~WinGLContext() {
+    this->teardown();
     this->destroyGLContext();
 }
 
 void WinGLContext::destroyGLContext() {
-    fGL.reset(NULL);
     SkSafeSetNull(fPbufferContext);
     if (fGlRenderContext) {
         wglDeleteContext(fGlRenderContext);
@@ -147,7 +151,7 @@
     }
 }
 
-void WinGLContext::makeCurrent() const {
+void WinGLContext::onPlatformMakeCurrent() const {
     HDC dc;
     HGLRC glrc;
 
@@ -164,7 +168,7 @@
     }
 }
 
-void WinGLContext::swapBuffers() const {
+void WinGLContext::onPlatformSwapBuffers() const {
     HDC dc;
 
     if (NULL == fPbufferContext) {
@@ -177,6 +181,10 @@
     }
 }
 
+GrGLFuncPtr WinGLContext::onPlatformGetProcAddress(const char* name) const {
+    return reinterpret_cast<GrGLFuncPtr>(wglGetProcAddress(name));
+}
+
 } // anonymous namespace
 
 SkGLContext* SkCreatePlatformGLContext(GrGLStandard forcedGpuAPI) {