Re-land "Implement EGL_experimental_present_path_angle"

- Re-land with clang fix.

This allows ANGLE to render directly onto a D3D swapchain in the correct
orientation when using the D3D11 renderer.

The trick is to add an extra uniform to each shader which takes either
the value +1.0 or -1.0. When rendering to a texture, ANGLE sets this
value to -1.0. When rendering to the default framebuffer, ANGLE sets
this value to +1.0. ANGLE multiplies vertex positions by this value in
the VS to invert rendering when appropriate. It also corrects other
state (e.g. viewport/scissor rect) and shader built-in values
(e.g. gl_FragCoord).

This saves a substantial amount of GPU time and lowers power
consumption. For example, the old method (where ANGLE renders all
content onto an offscreen texture, and then copies/inverts this onto the
swapchain at eglSwapBuffers() time) uses about 20% of the GPU each frame
on a Lumia 630.

Verification:
+ dEQP GL ES2 tests pass when "present path fast" is enabled
+ all ANGLE_end2end_tests pass when "present path fast" is enabled

BUG=angleproject:1219

Change-Id: I56b339897828753a616d7bae837a2f354dba9c63
Reviewed-on: https://chromium-review.googlesource.com/326730
Tryjob-Request: Austin Kinross <aukinros@microsoft.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/libANGLE/renderer/d3d/DynamicHLSL.cpp b/src/libANGLE/renderer/d3d/DynamicHLSL.cpp
index 1d07b95..42a534f 100644
--- a/src/libANGLE/renderer/d3d/DynamicHLSL.cpp
+++ b/src/libANGLE/renderer/d3d/DynamicHLSL.cpp
@@ -412,6 +412,9 @@
     const ShaderD3D *fragmentShader    = GetImplAs<ShaderD3D>(fragmentShaderGL);
     const int shaderModel              = mRenderer->getMajorShaderModel();
 
+    // usesViewScale() isn't supported in the D3D9 renderer
+    ASSERT(shaderModel >= 4 || !programMetadata.usesViewScale());
+
     bool useInstancedPointSpriteEmulation =
         programMetadata.usesPointSize() &&
         mRenderer->getWorkarounds().useInstancedPointSpriteEmulation;
@@ -464,18 +467,43 @@
     // On D3D9 or D3D11 Feature Level 9, we need to emulate large viewports using dx_ViewAdjust.
     if (shaderModel >= 4 && mRenderer->getShaderModelSuffix() == "")
     {
-        vertexStream << "    output.dx_Position.x = gl_Position.x;\n"
-                     << "    output.dx_Position.y = -gl_Position.y;\n"
-                     << "    output.dx_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n"
+        vertexStream << "    output.dx_Position.x = gl_Position.x;\n";
+
+        if (programMetadata.usesViewScale())
+        {
+            // This code assumes that dx_ViewScale.y = -1.0f when rendering to texture, and +1.0f
+            // when rendering to the default framebuffer. No other values are valid.
+            vertexStream << "    output.dx_Position.y = dx_ViewScale.y * gl_Position.y;\n";
+        }
+        else
+        {
+            vertexStream << "    output.dx_Position.y = - gl_Position.y;\n";
+        }
+
+        vertexStream << "    output.dx_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n"
                      << "    output.dx_Position.w = gl_Position.w;\n";
     }
     else
     {
         vertexStream << "    output.dx_Position.x = gl_Position.x * dx_ViewAdjust.z + "
-                        "dx_ViewAdjust.x * gl_Position.w;\n"
-                     << "    output.dx_Position.y = -(gl_Position.y * dx_ViewAdjust.w + "
-                        "dx_ViewAdjust.y * gl_Position.w);\n"
-                     << "    output.dx_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n"
+                        "dx_ViewAdjust.x * gl_Position.w;\n";
+
+        // If usesViewScale() is true and we're using the D3D11 renderer via Feature Level 9_*,
+        // then we need to multiply the gl_Position.y by the viewScale.
+        // usesViewScale() isn't supported when using the D3D9 renderer.
+        if (programMetadata.usesViewScale() &&
+            (shaderModel >= 4 && mRenderer->getShaderModelSuffix() != ""))
+        {
+            vertexStream << "    output.dx_Position.y = dx_ViewScale.y * (gl_Position.y * "
+                            "dx_ViewAdjust.w + dx_ViewAdjust.y * gl_Position.w);\n";
+        }
+        else
+        {
+            vertexStream << "    output.dx_Position.y = -(gl_Position.y * dx_ViewAdjust.w + "
+                            "dx_ViewAdjust.y * gl_Position.w);\n";
+        }
+
+        vertexStream << "    output.dx_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n"
                      << "    output.dx_Position.w = gl_Position.w;\n";
     }
 
@@ -523,11 +551,26 @@
     if (useInstancedPointSpriteEmulation)
     {
         vertexStream << "\n"
-                     << "    gl_PointSize = clamp(gl_PointSize, minPointSize, maxPointSize);\n"
-                     << "    output.dx_Position.xyz += float3(input.spriteVertexPos.x * "
-                        "gl_PointSize / (dx_ViewCoords.x*2), input.spriteVertexPos.y * "
-                        "gl_PointSize / (dx_ViewCoords.y*2), input.spriteVertexPos.z) * "
-                        "output.dx_Position.w;\n";
+                     << "    gl_PointSize = clamp(gl_PointSize, minPointSize, maxPointSize);\n";
+
+        vertexStream << "    output.dx_Position.x += (input.spriteVertexPos.x * gl_PointSize / "
+                        "(dx_ViewCoords.x*2)) * output.dx_Position.w;";
+
+        if (programMetadata.usesViewScale())
+        {
+            // Multiply by ViewScale to invert the rendering when appropriate
+            vertexStream << "    output.dx_Position.y += (-dx_ViewScale.y * "
+                            "input.spriteVertexPos.y * gl_PointSize / (dx_ViewCoords.y*2)) * "
+                            "output.dx_Position.w;";
+        }
+        else
+        {
+            vertexStream << "    output.dx_Position.y += (input.spriteVertexPos.y * gl_PointSize / "
+                            "(dx_ViewCoords.y*2)) * output.dx_Position.w;";
+        }
+
+        vertexStream
+            << "    output.dx_Position.z += input.spriteVertexPos.z * output.dx_Position.w;\n";
 
         if (programMetadata.usesPointCoord())
         {
@@ -606,6 +649,49 @@
                            "dx_ViewCoords.w;\n";
         }
 
+        if (programMetadata.usesViewScale())
+        {
+            // For Feature Level 9_3 and below, we need to correct gl_FragCoord.y to account
+            // for dx_ViewScale. On Feature Level 10_0+, gl_FragCoord.y is calculated above using
+            // dx_ViewCoords and is always correct irrespective of dx_ViewScale's value.
+            // NOTE: usesViewScale() can only be true on D3D11 (i.e. Shader Model 4.0+).
+            if (shaderModel >= 4 && mRenderer->getShaderModelSuffix() == "")
+            {
+                // Some assumptions:
+                //  - dx_ViewScale.y = -1.0f when rendering to texture
+                //  - dx_ViewScale.y = +1.0f when rendering to the default framebuffer
+                //  - gl_FragCoord.y has been set correctly above.
+                //
+                // When rendering to the backbuffer, the code inverts gl_FragCoord's y coordinate.
+                // This involves subtracting the y coordinate from the height of the area being
+                // rendered to.
+                //
+                // First we calculate the height of the area being rendered to:
+                //    render_area_height = (2.0f / (1.0f - input.gl_FragCoord.y * rhw)) *
+                //    gl_FragCoord.y
+                //
+                // Note that when we're rendering to default FB, we want our output to be
+                // equivalent to:
+                //    "gl_FragCoord.y = render_area_height - gl_FragCoord.y"
+                //
+                // When we're rendering to a texture, we want our output to be equivalent to:
+                //    "gl_FragCoord.y = gl_FragCoord.y;"
+                //
+                // If we set scale_factor = ((1.0f + dx_ViewScale.y) / 2.0f), then notice that
+                //  - When rendering to default FB: scale_factor = 1.0f
+                //  - When rendering to texture:    scale_factor = 0.0f
+                //
+                // Therefore, we can get our desired output by setting:
+                //    "gl_FragCoord.y = scale_factor * render_area_height - dx_ViewScale.y *
+                //    gl_FragCoord.y"
+                //
+                // Simplifying, this becomes:
+                pixelStream
+                    << "    gl_FragCoord.y = (1.0f + dx_ViewScale.y) * gl_FragCoord.y /"
+                       "(1.0f - input.gl_FragCoord.y * rhw)  - dx_ViewScale.y * gl_FragCoord.y;\n";
+            }
+        }
+
         pixelStream << "    gl_FragCoord.z = (input.gl_FragCoord.z * rhw) * dx_DepthFront.x + "
                        "dx_DepthFront.y;\n"
                     << "    gl_FragCoord.w = rhw;\n";
@@ -749,6 +835,7 @@
 std::string DynamicHLSL::generateGeometryShaderHLSL(gl::PrimitiveType primitiveType,
                                                     const gl::Data &data,
                                                     const gl::Program::Data &programData,
+                                                    const bool useViewScale,
                                                     const std::string &preambleString) const
 {
     ASSERT(mRenderer->getMajorShaderModel() >= 4);
@@ -799,8 +886,14 @@
     {
         shaderStream << "#define ANGLE_POINT_SPRITE_SHADER\n"
                         "\n"
-                        "uniform float4 dx_ViewCoords : register(c1);\n"
-                        "\n"
+                        "uniform float4 dx_ViewCoords : register(c1);\n";
+
+        if (useViewScale)
+        {
+            shaderStream << "uniform float2 dx_ViewScale : register(c3);\n";
+        }
+
+        shaderStream << "\n"
                         "static float2 pointSpriteCorners[] = \n"
                         "{\n"
                         "    float2( 0.5f, -0.5f),\n"
@@ -870,9 +963,20 @@
 
         for (int corner = 0; corner < 4; corner++)
         {
-            shaderStream << "\n"
-                            "    output.dx_Position = dx_Position + float4(pointSpriteCorners["
-                         << corner << "] * viewportScale * gl_PointSize, 0.0f, 0.0f);\n";
+            if (useViewScale)
+            {
+                shaderStream << "    \n"
+                                "    output.dx_Position = dx_Position + float4(1.0f, "
+                                "-dx_ViewScale.y, 1.0f, 1.0f)"
+                                "        * float4(pointSpriteCorners["
+                             << corner << "] * viewportScale * gl_PointSize, 0.0f, 0.0f);\n";
+            }
+            else
+            {
+                shaderStream << "\n"
+                                "    output.dx_Position = dx_Position + float4(pointSpriteCorners["
+                             << corner << "] * viewportScale * gl_PointSize, 0.0f, 0.0f);\n";
+            }
 
             if (usesPointCoord)
             {
diff --git a/src/libANGLE/renderer/d3d/DynamicHLSL.h b/src/libANGLE/renderer/d3d/DynamicHLSL.h
index df52d24..69d941c 100644
--- a/src/libANGLE/renderer/d3d/DynamicHLSL.h
+++ b/src/libANGLE/renderer/d3d/DynamicHLSL.h
@@ -74,6 +74,7 @@
     std::string generateGeometryShaderHLSL(gl::PrimitiveType primitiveType,
                                            const gl::Data &data,
                                            const gl::Program::Data &programData,
+                                           const bool useViewScale,
                                            const std::string &preambleString) const;
 
     void getPixelShaderOutputKey(const gl::Data &data,
diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.cpp b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
index bde3409..37a288e 100644
--- a/src/libANGLE/renderer/d3d/ProgramD3D.cpp
+++ b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
@@ -401,11 +401,13 @@
 ProgramD3DMetadata::ProgramD3DMetadata(int rendererMajorShaderModel,
                                        const std::string &shaderModelSuffix,
                                        bool usesInstancedPointSpriteEmulation,
+                                       bool usesViewScale,
                                        const ShaderD3D *vertexShader,
                                        const ShaderD3D *fragmentShader)
     : mRendererMajorShaderModel(rendererMajorShaderModel),
       mShaderModelSuffix(shaderModelSuffix),
       mUsesInstancedPointSpriteEmulation(usesInstancedPointSpriteEmulation),
+      mUsesViewScale(usesViewScale),
       mVertexShader(vertexShader),
       mFragmentShader(fragmentShader)
 {
@@ -446,6 +448,11 @@
     return !usesPointSize() && usesPointCoord() && mRendererMajorShaderModel >= 4;
 }
 
+bool ProgramD3DMetadata::usesViewScale() const
+{
+    return mUsesViewScale;
+}
+
 bool ProgramD3DMetadata::addsPointCoordToVertexShader() const
 {
     // Instanced PointSprite emulation requires that gl_PointCoord is present in the vertex shader
@@ -1259,7 +1266,8 @@
     }
 
     std::string geometryHLSL = mDynamicHLSL->generateGeometryShaderHLSL(
-        geometryShaderType, data, mData, mGeometryShaderPreamble);
+        geometryShaderType, data, mData, mRenderer->presentPathFastEnabled(),
+        mGeometryShaderPreamble);
 
     gl::InfoLog tempInfoLog;
     gl::InfoLog *currentInfoLog = infoLog ? infoLog : &tempInfoLog;
@@ -1387,7 +1395,8 @@
     }
 
     ProgramD3DMetadata metadata(mRenderer->getMajorShaderModel(), mRenderer->getShaderModelSuffix(),
-                                usesInstancedPointSpriteEmulation(), vertexShaderD3D,
+                                usesInstancedPointSpriteEmulation(),
+                                mRenderer->presentPathFastEnabled(), vertexShaderD3D,
                                 fragmentShaderD3D);
 
     varyingPacking.enableBuiltins(SHADER_VERTEX, metadata);
diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.h b/src/libANGLE/renderer/d3d/ProgramD3D.h
index 240cea1..3dfe52d 100644
--- a/src/libANGLE/renderer/d3d/ProgramD3D.h
+++ b/src/libANGLE/renderer/d3d/ProgramD3D.h
@@ -103,6 +103,7 @@
     ProgramD3DMetadata(int rendererMajorShaderModel,
                        const std::string &shaderModelSuffix,
                        bool usesInstancedPointSpriteEmulation,
+                       bool usesViewScale,
                        const ShaderD3D *vertexShader,
                        const ShaderD3D *fragmentShader);
 
@@ -113,6 +114,7 @@
     bool usesFragCoord() const;
     bool usesPointSize() const;
     bool usesInsertedPointCoordValue() const;
+    bool usesViewScale() const;
     bool addsPointCoordToVertexShader() const;
     bool usesTransformFeedbackGLPosition() const;
     bool usesSystemValuePointSize() const;
@@ -124,6 +126,7 @@
     const int mRendererMajorShaderModel;
     const std::string mShaderModelSuffix;
     const bool mUsesInstancedPointSpriteEmulation;
+    const bool mUsesViewScale;
     const ShaderD3D *mVertexShader;
     const ShaderD3D *mFragmentShader;
 };
diff --git a/src/libANGLE/renderer/d3d/RendererD3D.cpp b/src/libANGLE/renderer/d3d/RendererD3D.cpp
index 11a6860..7dfb118 100644
--- a/src/libANGLE/renderer/d3d/RendererD3D.cpp
+++ b/src/libANGLE/renderer/d3d/RendererD3D.cpp
@@ -41,6 +41,7 @@
     : mDisplay(display),
       mDeviceLost(false),
       mAnnotator(nullptr),
+      mPresentPathFastEnabled(false),
       mScratchMemoryBufferResetCounter(0),
       mWorkaroundsInitialized(false),
       mDisjoint(false)
diff --git a/src/libANGLE/renderer/d3d/RendererD3D.h b/src/libANGLE/renderer/d3d/RendererD3D.h
index b9d050e..32ccf28 100644
--- a/src/libANGLE/renderer/d3d/RendererD3D.h
+++ b/src/libANGLE/renderer/d3d/RendererD3D.h
@@ -251,6 +251,8 @@
 
     virtual egl::Error getEGLDevice(DeviceImpl **device) = 0;
 
+    bool presentPathFastEnabled() const { return mPresentPathFastEnabled; }
+
   protected:
     virtual bool getLUID(LUID *adapterLuid) const = 0;
     virtual gl::Error applyShadersImpl(const gl::Data &data, GLenum drawMode) = 0;
@@ -270,6 +272,8 @@
 
     std::vector<TranslatedAttribute> mTranslatedAttribCache;
 
+    bool mPresentPathFastEnabled;
+
   private:
     gl::Error genericDrawArrays(const gl::Data &data,
                                 GLenum mode,
@@ -329,20 +333,6 @@
     bool mDisjoint;
 };
 
-struct dx_VertexConstants
-{
-    float depthRange[4];
-    float viewAdjust[4];
-    float viewCoords[4];
-};
-
-struct dx_PixelConstants
-{
-    float depthRange[4];
-    float viewCoords[4];
-    float depthFront[4];
-};
-
 }
 
 #endif // LIBANGLE_RENDERER_D3D_RENDERERD3D_H_
diff --git a/src/libANGLE/renderer/d3d/d3d11/Framebuffer11.cpp b/src/libANGLE/renderer/d3d/d3d11/Framebuffer11.cpp
index ad2360d..beffa30 100644
--- a/src/libANGLE/renderer/d3d/d3d11/Framebuffer11.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/Framebuffer11.cpp
@@ -93,7 +93,27 @@
 gl::Error Framebuffer11::clear(const gl::Data &data, const ClearParameters &clearParams)
 {
     Clear11 *clearer = mRenderer->getClearer();
-    gl::Error error = clearer->clearFramebuffer(clearParams, mData);
+    gl::Error error(GL_NO_ERROR);
+
+    const gl::FramebufferAttachment *colorAttachment = mData.getFirstColorAttachment();
+    if (clearParams.scissorEnabled == true && colorAttachment != nullptr &&
+        UsePresentPathFast(mRenderer, colorAttachment))
+    {
+        // If the current framebuffer is using the default colorbuffer, and present path fast is
+        // active, and the scissor rect is enabled, then we should invert the scissor rect
+        // vertically
+        ClearParameters presentPathFastClearParams = clearParams;
+        gl::Extents framebufferSize                = colorAttachment->getSize();
+        presentPathFastClearParams.scissor.y       = framebufferSize.height -
+                                               presentPathFastClearParams.scissor.y -
+                                               presentPathFastClearParams.scissor.height;
+        error = clearer->clearFramebuffer(presentPathFastClearParams, mData);
+    }
+    else
+    {
+        error = clearer->clearFramebuffer(clearParams, mData);
+    }
+
     if (error.isError())
     {
         return error;
@@ -328,8 +348,27 @@
                 }
                 ASSERT(drawRenderTarget);
 
-                error = mRenderer->blitRenderbufferRect(sourceArea, destArea, readRenderTarget, drawRenderTarget,
-                                                        filter, scissor, blitRenderTarget, false, false);
+                const bool invertColorSource   = UsePresentPathFast(mRenderer, readBuffer);
+                gl::Rectangle actualSourceArea = sourceArea;
+                if (invertColorSource)
+                {
+                    RenderTarget11 *readRenderTarget11 = GetAs<RenderTarget11>(readRenderTarget);
+                    actualSourceArea.y                 = readRenderTarget11->getHeight() - sourceArea.y;
+                    actualSourceArea.height            = -sourceArea.height;
+                }
+
+                const bool invertColorDest   = UsePresentPathFast(mRenderer, &drawBuffer);
+                gl::Rectangle actualDestArea = destArea;
+                if (invertColorDest)
+                {
+                    RenderTarget11 *drawRenderTarget11 = GetAs<RenderTarget11>(drawRenderTarget);
+                    actualDestArea.y                   = drawRenderTarget11->getHeight() - destArea.y;
+                    actualDestArea.height              = -destArea.height;
+                }
+
+                error = mRenderer->blitRenderbufferRect(actualSourceArea, actualDestArea,
+                                                        readRenderTarget, drawRenderTarget, filter,
+                                                        scissor, blitRenderTarget, false, false);
                 if (error.isError())
                 {
                     return error;
diff --git a/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp b/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
index 537af0e..d0f22e4 100644
--- a/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
@@ -473,6 +473,10 @@
             default:
                 UNREACHABLE();
         }
+
+        const EGLenum presentPath = attributes.get(EGL_EXPERIMENTAL_PRESENT_PATH_ANGLE,
+                                                   EGL_EXPERIMENTAL_PRESENT_PATH_COPY_ANGLE);
+        mPresentPathFastEnabled = (presentPath == EGL_EXPERIMENTAL_PRESENT_PATH_FAST_ANGLE);
     }
     else if (display->getPlatform() == EGL_PLATFORM_DEVICE_EXT)
     {
@@ -483,6 +487,7 @@
         // Also set EGL_PLATFORM_ANGLE_ANGLE variables, in case they're used elsewhere in ANGLE
         // mAvailableFeatureLevels defaults to empty
         mRequestedDriverType = D3D_DRIVER_TYPE_UNKNOWN;
+        mPresentPathFastEnabled = false;
     }
 
     initializeDebugAnnotator();
@@ -871,14 +876,24 @@
 
 egl::ConfigSet Renderer11::generateConfigs() const
 {
-    static const GLenum colorBufferFormats[] = {
-        // 32-bit supported formats
-        GL_BGRA8_EXT, GL_RGBA8_OES,
-        // 24-bit supported formats
-        GL_RGB8_OES,
+    std::vector<GLenum> colorBufferFormats;
+
+    // 32-bit supported formats
+    colorBufferFormats.push_back(GL_BGRA8_EXT);
+    colorBufferFormats.push_back(GL_RGBA8_OES);
+
+    // 24-bit supported formats
+    colorBufferFormats.push_back(GL_RGB8_OES);
+
+    if (!mPresentPathFastEnabled)
+    {
         // 16-bit supported formats
-        GL_RGBA4, GL_RGB5_A1, GL_RGB565,
-    };
+        // These aren't valid D3D11 swapchain formats, so don't expose them as configs
+        // if present path fast is active
+        colorBufferFormats.push_back(GL_RGBA4);
+        colorBufferFormats.push_back(GL_RGB5_A1);
+        colorBufferFormats.push_back(GL_RGB565);
+    }
 
     static const GLenum depthStencilBufferFormats[] =
     {
@@ -890,67 +905,87 @@
     const gl::Caps &rendererCaps = getRendererCaps();
     const gl::TextureCapsMap &rendererTextureCaps = getRendererTextureCaps();
 
-    const EGLint optimalSurfaceOrientation = EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE;
+    const EGLint optimalSurfaceOrientation =
+        mPresentPathFastEnabled ? 0 : EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE;
 
     egl::ConfigSet configs;
-    for (size_t formatIndex = 0; formatIndex < ArraySize(colorBufferFormats); formatIndex++)
+    for (GLenum colorBufferInternalFormat : colorBufferFormats)
     {
-        GLenum colorBufferInternalFormat = colorBufferFormats[formatIndex];
         const gl::TextureCaps &colorBufferFormatCaps = rendererTextureCaps.get(colorBufferInternalFormat);
-        if (colorBufferFormatCaps.renderable)
+        if (!colorBufferFormatCaps.renderable)
         {
-            for (size_t depthStencilIndex = 0; depthStencilIndex < ArraySize(depthStencilBufferFormats); depthStencilIndex++)
+            continue;
+        }
+
+        for (GLenum depthStencilBufferInternalFormat : depthStencilBufferFormats)
+        {
+            const gl::TextureCaps &depthStencilBufferFormatCaps =
+                rendererTextureCaps.get(depthStencilBufferInternalFormat);
+            if (!depthStencilBufferFormatCaps.renderable &&
+                depthStencilBufferInternalFormat != GL_NONE)
             {
-                GLenum depthStencilBufferInternalFormat = depthStencilBufferFormats[depthStencilIndex];
-                const gl::TextureCaps &depthStencilBufferFormatCaps = rendererTextureCaps.get(depthStencilBufferInternalFormat);
-                if (depthStencilBufferFormatCaps.renderable || depthStencilBufferInternalFormat == GL_NONE)
-                {
-                    const gl::InternalFormat &colorBufferFormatInfo = gl::GetInternalFormatInfo(colorBufferInternalFormat);
-                    const gl::InternalFormat &depthStencilBufferFormatInfo = gl::GetInternalFormatInfo(depthStencilBufferInternalFormat);
-
-                    egl::Config config;
-                    config.renderTargetFormat = colorBufferInternalFormat;
-                    config.depthStencilFormat = depthStencilBufferInternalFormat;
-                    config.bufferSize = colorBufferFormatInfo.pixelBytes * 8;
-                    config.redSize = colorBufferFormatInfo.redBits;
-                    config.greenSize = colorBufferFormatInfo.greenBits;
-                    config.blueSize = colorBufferFormatInfo.blueBits;
-                    config.luminanceSize = colorBufferFormatInfo.luminanceBits;
-                    config.alphaSize = colorBufferFormatInfo.alphaBits;
-                    config.alphaMaskSize = 0;
-                    config.bindToTextureRGB = (colorBufferFormatInfo.format == GL_RGB);
-                    config.bindToTextureRGBA = (colorBufferFormatInfo.format == GL_RGBA || colorBufferFormatInfo.format == GL_BGRA_EXT);
-                    config.colorBufferType = EGL_RGB_BUFFER;
-                    config.configCaveat = EGL_NONE;
-                    config.configID = static_cast<EGLint>(configs.size() + 1);
-                    // Can only support a conformant ES2 with feature level greater than 10.0.
-                    config.conformant = (mRenderer11DeviceCaps.featureLevel >= D3D_FEATURE_LEVEL_10_0) ? (EGL_OPENGL_ES2_BIT | EGL_OPENGL_ES3_BIT_KHR) : 0;
-                    config.depthSize = depthStencilBufferFormatInfo.depthBits;
-                    config.level = 0;
-                    config.matchNativePixmap = EGL_NONE;
-                    config.maxPBufferWidth = rendererCaps.max2DTextureSize;
-                    config.maxPBufferHeight = rendererCaps.max2DTextureSize;
-                    config.maxPBufferPixels = rendererCaps.max2DTextureSize * rendererCaps.max2DTextureSize;
-                    config.maxSwapInterval = 4;
-                    config.minSwapInterval = 0;
-                    config.nativeRenderable = EGL_FALSE;
-                    config.nativeVisualID = 0;
-                    config.nativeVisualType = EGL_NONE;
-                    // Can't support ES3 at all without feature level 10.0
-                    config.renderableType = EGL_OPENGL_ES2_BIT | ((mRenderer11DeviceCaps.featureLevel >= D3D_FEATURE_LEVEL_10_0) ? EGL_OPENGL_ES3_BIT_KHR : 0);
-                    config.sampleBuffers = 0; // FIXME: enumerate multi-sampling
-                    config.samples = 0;
-                    config.stencilSize = depthStencilBufferFormatInfo.stencilBits;
-                    config.surfaceType = EGL_PBUFFER_BIT | EGL_WINDOW_BIT | EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
-                    config.transparentType = EGL_NONE;
-                    config.transparentRedValue = 0;
-                    config.transparentGreenValue = 0;
-                    config.transparentBlueValue = 0;
-                    config.optimalOrientation    = optimalSurfaceOrientation;
-
-                    configs.add(config);
-                }
+                continue;
             }
+
+            const gl::InternalFormat &colorBufferFormatInfo =
+                gl::GetInternalFormatInfo(colorBufferInternalFormat);
+            const gl::InternalFormat &depthStencilBufferFormatInfo =
+                gl::GetInternalFormatInfo(depthStencilBufferInternalFormat);
+
+            egl::Config config;
+            config.renderTargetFormat = colorBufferInternalFormat;
+            config.depthStencilFormat = depthStencilBufferInternalFormat;
+            config.bufferSize         = colorBufferFormatInfo.pixelBytes * 8;
+            config.redSize            = colorBufferFormatInfo.redBits;
+            config.greenSize          = colorBufferFormatInfo.greenBits;
+            config.blueSize           = colorBufferFormatInfo.blueBits;
+            config.luminanceSize      = colorBufferFormatInfo.luminanceBits;
+            config.alphaSize          = colorBufferFormatInfo.alphaBits;
+            config.alphaMaskSize      = 0;
+            config.bindToTextureRGB   = (colorBufferFormatInfo.format == GL_RGB);
+            config.bindToTextureRGBA = (colorBufferFormatInfo.format == GL_RGBA ||
+                                        colorBufferFormatInfo.format == GL_BGRA_EXT);
+            config.colorBufferType = EGL_RGB_BUFFER;
+            config.configCaveat    = EGL_NONE;
+            config.configID        = static_cast<EGLint>(configs.size() + 1);
+            // Can only support a conformant ES2 with feature level greater than 10.0.
+            config.conformant = (mRenderer11DeviceCaps.featureLevel >= D3D_FEATURE_LEVEL_10_0)
+                                    ? (EGL_OPENGL_ES2_BIT | EGL_OPENGL_ES3_BIT_KHR)
+                                    : 0;
+
+            // PresentPathFast may not be conformant
+            if (mPresentPathFastEnabled)
+            {
+                config.conformant = 0;
+            }
+
+            config.depthSize         = depthStencilBufferFormatInfo.depthBits;
+            config.level             = 0;
+            config.matchNativePixmap = EGL_NONE;
+            config.maxPBufferWidth   = rendererCaps.max2DTextureSize;
+            config.maxPBufferHeight  = rendererCaps.max2DTextureSize;
+            config.maxPBufferPixels  = rendererCaps.max2DTextureSize * rendererCaps.max2DTextureSize;
+            config.maxSwapInterval   = 4;
+            config.minSwapInterval   = 0;
+            config.nativeRenderable  = EGL_FALSE;
+            config.nativeVisualID    = 0;
+            config.nativeVisualType  = EGL_NONE;
+            // Can't support ES3 at all without feature level 10.0
+            config.renderableType =
+                EGL_OPENGL_ES2_BIT | ((mRenderer11DeviceCaps.featureLevel >= D3D_FEATURE_LEVEL_10_0)
+                                          ? EGL_OPENGL_ES3_BIT_KHR
+                                          : 0);
+            config.sampleBuffers         = 0;  // FIXME: enumerate multi-sampling
+            config.samples               = 0;
+            config.stencilSize           = depthStencilBufferFormatInfo.stencilBits;
+            config.surfaceType           = EGL_PBUFFER_BIT | EGL_WINDOW_BIT | EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
+            config.transparentType       = EGL_NONE;
+            config.transparentRedValue   = 0;
+            config.transparentGreenValue = 0;
+            config.transparentBlueValue  = 0;
+            config.optimalOrientation    = optimalSurfaceOrientation;
+
+            configs.add(config);
         }
     }
 
@@ -971,7 +1006,9 @@
     outExtensions->keyedMutex = true;
     outExtensions->querySurfacePointer = true;
     outExtensions->windowFixedSize     = true;
-    outExtensions->surfaceOrientation  = true;
+
+    // If present path fast is active then the surface orientation extension isn't supported
+    outExtensions->surfaceOrientation = !mPresentPathFastEnabled;
 
     // D3D11 does not support present with dirty rectangles until DXGI 1.2.
     outExtensions->postSubBuffer = mRenderer11DeviceCaps.supportsDXGI1_2;
@@ -1352,6 +1389,12 @@
         return error;
     }
 
+    // Set the present path state
+    const bool presentPathFastActive =
+        UsePresentPathFast(this, framebufferObject->getFirstColorbuffer());
+    mStateManager.updatePresentPath(presentPathFastActive,
+                                    framebufferObject->getFirstColorbuffer());
+
     // Setting viewport state
     mStateManager.setViewport(data.caps, data.state->getViewport(), data.state->getNearPlane(),
                               data.state->getFarPlane());
@@ -2097,7 +2140,7 @@
     if (!mDriverConstantBufferVS)
     {
         D3D11_BUFFER_DESC constantBufferDescription = {0};
-        constantBufferDescription.ByteWidth = sizeof(dx_VertexConstants);
+        constantBufferDescription.ByteWidth           = sizeof(dx_VertexConstants11);
         constantBufferDescription.Usage = D3D11_USAGE_DEFAULT;
         constantBufferDescription.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
         constantBufferDescription.CPUAccessFlags = 0;
@@ -2116,7 +2159,7 @@
     if (!mDriverConstantBufferPS)
     {
         D3D11_BUFFER_DESC constantBufferDescription = {0};
-        constantBufferDescription.ByteWidth = sizeof(dx_PixelConstants);
+        constantBufferDescription.ByteWidth           = sizeof(dx_PixelConstants11);
         constantBufferDescription.Usage = D3D11_USAGE_DEFAULT;
         constantBufferDescription.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
         constantBufferDescription.CPUAccessFlags = 0;
@@ -2132,27 +2175,27 @@
         mDeviceContext->PSSetConstantBuffers(1, 1, &mDriverConstantBufferPS);
     }
 
-    const dx_VertexConstants &vertexConstants = mStateManager.getVertexConstants();
-    if (memcmp(&vertexConstants, &mAppliedVertexConstants, sizeof(dx_VertexConstants)) != 0)
+    const dx_VertexConstants11 &vertexConstants = mStateManager.getVertexConstants();
+    if (memcmp(&vertexConstants, &mAppliedVertexConstants, sizeof(dx_VertexConstants11)) != 0)
     {
         ASSERT(mDriverConstantBufferVS != nullptr);
         if (mDriverConstantBufferVS)
         {
             mDeviceContext->UpdateSubresource(mDriverConstantBufferVS, 0, NULL, &vertexConstants,
                                               16, 0);
-            memcpy(&mAppliedVertexConstants, &vertexConstants, sizeof(dx_VertexConstants));
+            memcpy(&mAppliedVertexConstants, &vertexConstants, sizeof(dx_VertexConstants11));
         }
     }
 
-    const dx_PixelConstants &pixelConstants = mStateManager.getPixelConstants();
-    if (memcmp(&pixelConstants, &mAppliedPixelConstants, sizeof(dx_PixelConstants)) != 0)
+    const dx_PixelConstants11 &pixelConstants = mStateManager.getPixelConstants();
+    if (memcmp(&pixelConstants, &mAppliedPixelConstants, sizeof(dx_PixelConstants11)) != 0)
     {
         ASSERT(mDriverConstantBufferPS != nullptr);
         if (mDriverConstantBufferPS)
         {
             mDeviceContext->UpdateSubresource(mDriverConstantBufferPS, 0, NULL, &pixelConstants, 16,
                                               0);
-            memcpy(&mAppliedPixelConstants, &pixelConstants, sizeof(dx_PixelConstants));
+            memcpy(&mAppliedPixelConstants, &pixelConstants, sizeof(dx_PixelConstants11));
         }
     }
 
@@ -2206,8 +2249,8 @@
         mAppliedTFOffsets[i] = 0;
     }
 
-    memset(&mAppliedVertexConstants, 0, sizeof(dx_VertexConstants));
-    memset(&mAppliedPixelConstants, 0, sizeof(dx_PixelConstants));
+    memset(&mAppliedVertexConstants, 0, sizeof(dx_VertexConstants11));
+    memset(&mAppliedPixelConstants, 0, sizeof(dx_PixelConstants11));
 
     mInputLayoutCache.markDirty();
 
@@ -2602,6 +2645,13 @@
     gl::Box sourceArea(sourceRect.x, sourceRect.y, 0, sourceRect.width, sourceRect.height, 1);
     gl::Extents sourceSize(sourceRenderTarget->getWidth(), sourceRenderTarget->getHeight(), 1);
 
+    const bool invertSource = UsePresentPathFast(this, colorbuffer);
+    if (invertSource)
+    {
+        sourceArea.y      = sourceSize.height - sourceRect.y;
+        sourceArea.height = -sourceArea.height;
+    }
+
     gl::Box destArea(destOffset.x, destOffset.y, 0, sourceRect.width, sourceRect.height, 1);
     gl::Extents destSize(destRenderTarget->getWidth(), destRenderTarget->getHeight(), 1);
 
@@ -2654,6 +2704,13 @@
     gl::Box sourceArea(sourceRect.x, sourceRect.y, 0, sourceRect.width, sourceRect.height, 1);
     gl::Extents sourceSize(sourceRenderTarget->getWidth(), sourceRenderTarget->getHeight(), 1);
 
+    const bool invertSource = UsePresentPathFast(this, colorbuffer);
+    if (invertSource)
+    {
+        sourceArea.y      = sourceSize.height - sourceRect.y;
+        sourceArea.height = -sourceArea.height;
+    }
+
     gl::Box destArea(destOffset.x, destOffset.y, 0, sourceRect.width, sourceRect.height, 1);
     gl::Extents destSize(destRenderTarget->getWidth(), destRenderTarget->getHeight(), 1);
 
@@ -3333,6 +3390,8 @@
     ASSERT(sourceArea.width >= 0);
     ASSERT(sourceArea.height >= 0);
 
+    const bool invertTexture = UsePresentPathFast(this, &srcAttachment);
+
     RenderTargetD3D *renderTarget = nullptr;
     gl::Error error = srcAttachment.getRenderTarget(&renderTarget);
     if (error.isError())
@@ -3348,15 +3407,21 @@
 
     const gl::Extents &texSize = textureHelper.getExtents();
 
+    gl::Rectangle actualArea = sourceArea;
+    if (invertTexture)
+    {
+        actualArea.y = texSize.height - actualArea.y - actualArea.height;
+    }
+
     // Clamp read region to the defined texture boundaries, preventing out of bounds reads
     // and reads of uninitialized data.
     gl::Rectangle safeArea;
-    safeArea.x = gl::clamp(sourceArea.x, 0, texSize.width);
-    safeArea.y = gl::clamp(sourceArea.y, 0, texSize.height);
+    safeArea.x = gl::clamp(actualArea.x, 0, texSize.width);
+    safeArea.y = gl::clamp(actualArea.y, 0, texSize.height);
     safeArea.width =
-        gl::clamp(sourceArea.width + std::min(sourceArea.x, 0), 0, texSize.width - safeArea.x);
+        gl::clamp(actualArea.width + std::min(actualArea.x, 0), 0, texSize.width - safeArea.x);
     safeArea.height =
-        gl::clamp(sourceArea.height + std::min(sourceArea.y, 0), 0, texSize.height - safeArea.y);
+        gl::clamp(actualArea.height + std::min(actualArea.y, 0), 0, texSize.height - safeArea.y);
 
     ASSERT(safeArea.x >= 0 && safeArea.y >= 0);
     ASSERT(safeArea.x + safeArea.width <= texSize.width);
@@ -3433,8 +3498,30 @@
     mDeviceContext->CopySubresourceRegion(stagingHelper.getResource(), 0, 0, 0, 0,
                                           srcTexture->getResource(), sourceSubResource, &srcBox);
 
-    PackPixelsParams packParams(safeArea, format, type, outputPitch, pack, 0);
-    return packPixels(stagingHelper, packParams, pixelsOut);
+    if (invertTexture)
+    {
+        gl::PixelPackState invertTexturePack;
+
+        // Create a new PixelPackState with reversed row order. Note that we can't just assign
+        // 'invertTexturePack' to be 'pack' (or memcpy) since that breaks the ref counting/object
+        // tracking in the 'pixelBuffer' members, causing leaks. Instead we must use
+        // pixelBuffer.set() twice, which performs the addRef/release correctly
+        invertTexturePack.alignment = pack.alignment;
+        invertTexturePack.pixelBuffer.set(pack.pixelBuffer.get());
+        invertTexturePack.reverseRowOrder = !pack.reverseRowOrder;
+
+        PackPixelsParams packParams(safeArea, format, type, outputPitch, invertTexturePack, 0);
+        error = packPixels(stagingHelper, packParams, pixelsOut);
+
+        invertTexturePack.pixelBuffer.set(nullptr);
+
+        return error;
+    }
+    else
+    {
+        PackPixelsParams packParams(safeArea, format, type, outputPitch, pack, 0);
+        return packPixels(stagingHelper, packParams, pixelsOut);
+    }
 }
 
 gl::Error Renderer11::packPixels(const TextureHelper11 &textureHelper,
diff --git a/src/libANGLE/renderer/d3d/d3d11/Renderer11.h b/src/libANGLE/renderer/d3d/d3d11/Renderer11.h
index 8a53c80..ced90d3 100644
--- a/src/libANGLE/renderer/d3d/d3d11/Renderer11.h
+++ b/src/libANGLE/renderer/d3d/d3d11/Renderer11.h
@@ -389,14 +389,14 @@
     uintptr_t mAppliedGeometryShader;
     uintptr_t mAppliedPixelShader;
 
-    dx_VertexConstants mAppliedVertexConstants;
+    dx_VertexConstants11 mAppliedVertexConstants;
     ID3D11Buffer *mDriverConstantBufferVS;
     ID3D11Buffer *mCurrentVertexConstantBuffer;
     unsigned int mCurrentConstantBufferVS[gl::IMPLEMENTATION_MAX_VERTEX_SHADER_UNIFORM_BUFFERS];
     GLintptr mCurrentConstantBufferVSOffset[gl::IMPLEMENTATION_MAX_VERTEX_SHADER_UNIFORM_BUFFERS];
     GLsizeiptr mCurrentConstantBufferVSSize[gl::IMPLEMENTATION_MAX_VERTEX_SHADER_UNIFORM_BUFFERS];
 
-    dx_PixelConstants mAppliedPixelConstants;
+    dx_PixelConstants11 mAppliedPixelConstants;
     ID3D11Buffer *mDriverConstantBufferPS;
     ID3D11Buffer *mCurrentPixelConstantBuffer;
     unsigned int mCurrentConstantBufferPS[gl::IMPLEMENTATION_MAX_FRAGMENT_SHADER_UNIFORM_BUFFERS];
diff --git a/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp b/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp
index 68af21b..aa34fd4 100644
--- a/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp
@@ -215,6 +215,23 @@
     }
 }
 
+void StateManager11::updatePresentPath(bool presentPathFastActive,
+                                       const gl::FramebufferAttachment *framebufferAttachment)
+{
+    const int colorBufferHeight =
+        framebufferAttachment ? framebufferAttachment->getSize().height : 0;
+
+    if ((mCurPresentPathFastEnabled != presentPathFastActive) ||
+        (presentPathFastActive && (colorBufferHeight != mCurPresentPathFastColorBufferHeight)))
+    {
+        mCurPresentPathFastEnabled           = presentPathFastActive;
+        mCurPresentPathFastColorBufferHeight = colorBufferHeight;
+        mViewportStateIsDirty                = true;  // Viewport may need to be vertically inverted
+        mScissorStateIsDirty                 = true;  // Scissor rect may need to be vertically inverted
+        mRasterizerStateIsDirty              = true;  // Cull Mode may need to be inverted
+    }
+}
+
 void StateManager11::syncState(const gl::State &state, const gl::State::DirtyBits &dirtyBits)
 {
     if (!dirtyBits.any())
@@ -571,8 +588,34 @@
     }
 
     ID3D11RasterizerState *dxRasterState = nullptr;
-    gl::Error error = mRenderer->getStateCache().getRasterizerState(rasterState, mCurScissorEnabled,
-                                                                    &dxRasterState);
+    gl::Error error(GL_NO_ERROR);
+
+    if (mCurPresentPathFastEnabled)
+    {
+        gl::RasterizerState modifiedRasterState = rasterState;
+
+        // If prseent path fast is active then we need invert the front face state.
+        // This ensures that both gl_FrontFacing is correct, and front/back culling
+        // is performed correctly.
+        if (modifiedRasterState.frontFace == GL_CCW)
+        {
+            modifiedRasterState.frontFace = GL_CW;
+        }
+        else
+        {
+            ASSERT(modifiedRasterState.frontFace == GL_CW);
+            modifiedRasterState.frontFace = GL_CCW;
+        }
+
+        error = mRenderer->getStateCache().getRasterizerState(modifiedRasterState,
+                                                              mCurScissorEnabled, &dxRasterState);
+    }
+    else
+    {
+        error = mRenderer->getStateCache().getRasterizerState(rasterState, mCurScissorEnabled,
+                                                              &dxRasterState);
+    }
+
     if (error.isError())
     {
         return error;
@@ -591,13 +634,19 @@
     if (!mScissorStateIsDirty)
         return;
 
+    int modifiedScissorY = scissor.y;
+    if (mCurPresentPathFastEnabled)
+    {
+        modifiedScissorY = mCurPresentPathFastColorBufferHeight - scissor.height - scissor.y;
+    }
+
     if (enabled)
     {
         D3D11_RECT rect;
         rect.left   = std::max(0, scissor.x);
-        rect.top    = std::max(0, scissor.y);
+        rect.top    = std::max(0, modifiedScissorY);
         rect.right  = scissor.x + std::max(0, scissor.width);
-        rect.bottom = scissor.y + std::max(0, scissor.height);
+        rect.bottom = modifiedScissorY + std::max(0, scissor.height);
 
         mRenderer->getDeviceContext()->RSSetScissorRects(1, &rect);
     }
@@ -639,7 +688,22 @@
 
     D3D11_VIEWPORT dxViewport;
     dxViewport.TopLeftX = static_cast<float>(dxViewportTopLeftX);
-    dxViewport.TopLeftY = static_cast<float>(dxViewportTopLeftY);
+
+    if (mCurPresentPathFastEnabled)
+    {
+        // When present path fast is active and we're rendering to framebuffer 0, we must invert
+        // the viewport in Y-axis.
+        // NOTE: We delay the inversion until right before the call to RSSetViewports, and leave
+        // dxViewportTopLeftY unchanged. This allows us to calculate viewAdjust below using the
+        // unaltered dxViewportTopLeftY value.
+        dxViewport.TopLeftY = static_cast<float>(mCurPresentPathFastColorBufferHeight -
+                                                 dxViewportTopLeftY - dxViewportHeight);
+    }
+    else
+    {
+        dxViewport.TopLeftY = static_cast<float>(dxViewportTopLeftY);
+    }
+
     dxViewport.Width    = static_cast<float>(dxViewportWidth);
     dxViewport.Height   = static_cast<float>(dxViewportHeight);
     dxViewport.MinDepth = actualZNear;
@@ -688,6 +752,16 @@
     mPixelConstants.depthRange[1] = actualZFar;
     mPixelConstants.depthRange[2] = actualZFar - actualZNear;
 
+    mPixelConstants.viewScale[0] = 1.0f;
+    mPixelConstants.viewScale[1] = mCurPresentPathFastEnabled ? 1.0f : -1.0f;
+    mPixelConstants.viewScale[2] = 1.0f;
+    mPixelConstants.viewScale[3] = 1.0f;
+
+    mVertexConstants.viewScale[0] = mPixelConstants.viewScale[0];
+    mVertexConstants.viewScale[1] = mPixelConstants.viewScale[1];
+    mVertexConstants.viewScale[2] = mPixelConstants.viewScale[2];
+    mVertexConstants.viewScale[3] = mPixelConstants.viewScale[3];
+
     mViewportStateIsDirty = false;
 }
 
diff --git a/src/libANGLE/renderer/d3d/d3d11/StateManager11.h b/src/libANGLE/renderer/d3d/d3d11/StateManager11.h
index e8f1e2b..f900882 100644
--- a/src/libANGLE/renderer/d3d/d3d11/StateManager11.h
+++ b/src/libANGLE/renderer/d3d/d3d11/StateManager11.h
@@ -24,6 +24,22 @@
 struct RenderTargetDesc;
 struct Renderer11DeviceCaps;
 
+struct dx_VertexConstants11
+{
+    float depthRange[4];
+    float viewAdjust[4];
+    float viewCoords[4];
+    float viewScale[4];
+};
+
+struct dx_PixelConstants11
+{
+    float depthRange[4];
+    float viewCoords[4];
+    float depthFront[4];
+    float viewScale[4];
+};
+
 class StateManager11 final : angle::NonCopyable
 {
   public:
@@ -46,8 +62,11 @@
 
     void setViewport(const gl::Caps *caps, const gl::Rectangle &viewport, float zNear, float zFar);
 
-    const dx_VertexConstants &getVertexConstants() const { return mVertexConstants; }
-    const dx_PixelConstants &getPixelConstants() const { return mPixelConstants; }
+    void updatePresentPath(bool presentPathFastActive,
+                           const gl::FramebufferAttachment *framebufferAttachment);
+
+    const dx_VertexConstants11 &getVertexConstants() const { return mVertexConstants; }
+    const dx_PixelConstants11 &getPixelConstants() const { return mPixelConstants; }
 
     void updateStencilSizeIfChanged(bool depthStencilInitialized, unsigned int stencilSize);
 
@@ -90,7 +109,7 @@
     Optional<bool> mCurDisableDepth;
     Optional<bool> mCurDisableStencil;
 
-    // Currenly applied rasterizer state
+    // Currently applied rasterizer state
     bool mRasterizerStateIsDirty;
     gl::RasterizerState mCurRasterState;
 
@@ -106,8 +125,8 @@
     float mCurFar;
 
     // Things needed in viewport state
-    dx_VertexConstants mVertexConstants;
-    dx_PixelConstants mPixelConstants;
+    dx_VertexConstants11 mVertexConstants;
+    dx_PixelConstants11 mPixelConstants;
 
     // Render target variables
     gl::Extents mViewportBounds;
diff --git a/src/libANGLE/renderer/d3d/d3d11/SwapChain11.cpp b/src/libANGLE/renderer/d3d/d3d11/SwapChain11.cpp
index ec68019..a56d3fa 100644
--- a/src/libANGLE/renderer/d3d/d3d11/SwapChain11.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/SwapChain11.cpp
@@ -31,6 +31,17 @@
 namespace rx
 {
 
+namespace
+{
+bool NeedsOffscreenTexture(Renderer11 *renderer, NativeWindow nativeWindow, EGLint orientation)
+{
+    // We don't need an offscreen texture if either orientation = INVERT_Y,
+    // or present path fast is enabled and we're not rendering onto an offscreen surface.
+    return orientation != EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE &&
+           !(renderer->presentPathFastEnabled() && nativeWindow.getNativeWindow());
+}
+}  // anonymous namespace
+
 SwapChain11::SwapChain11(Renderer11 *renderer,
                          NativeWindow nativeWindow,
                          HANDLE shareHandle,
@@ -52,7 +63,7 @@
       mBackBufferTexture(nullptr),
       mBackBufferRTView(nullptr),
       mBackBufferSRView(nullptr),
-      mNeedsOffscreenTexture(orientation != EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE),
+      mNeedsOffscreenTexture(NeedsOffscreenTexture(renderer, nativeWindow, orientation)),
       mOffscreenTexture(nullptr),
       mOffscreenRTView(nullptr),
       mOffscreenSRView(nullptr),
@@ -68,6 +79,8 @@
       mColorRenderTarget(this, renderer, false),
       mDepthStencilRenderTarget(this, renderer, true)
 {
+    // Sanity check that if present path fast is active then we're using the default orientation
+    ASSERT(!mRenderer->presentPathFastEnabled() || orientation == 0);
 }
 
 SwapChain11::~SwapChain11()
diff --git a/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp b/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp
index b37ceaf..c0ce409 100644
--- a/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp
@@ -292,7 +292,7 @@
         case D3D_FEATURE_LEVEL_9_3:
         case D3D_FEATURE_LEVEL_9_2:
         case D3D_FEATURE_LEVEL_9_1:
-            return 2;  // dx_ViewAdjust and dx_ViewCoords
+            return 3;  // dx_ViewAdjust, dx_ViewCoords and dx_ViewScale
 
         default:
             UNREACHABLE();
@@ -313,7 +313,7 @@
         case D3D_FEATURE_LEVEL_9_3:
         case D3D_FEATURE_LEVEL_9_2:
         case D3D_FEATURE_LEVEL_9_1:
-            return 2;
+            return 3;
 
         default:
             UNREACHABLE();
@@ -1668,4 +1668,16 @@
     return TextureHelper11::MakeAndPossess3D(stagingTex);
 }
 
+bool UsePresentPathFast(const Renderer11 *renderer,
+                        const gl::FramebufferAttachment *framebufferAttachment)
+{
+    if (framebufferAttachment == nullptr)
+    {
+        return false;
+    }
+
+    return (framebufferAttachment->type() == GL_FRAMEBUFFER_DEFAULT &&
+            renderer->presentPathFastEnabled());
+}
+
 }  // namespace rx
diff --git a/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.h b/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.h
index d84fb8a..72389e4 100644
--- a/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.h
+++ b/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.h
@@ -25,6 +25,7 @@
 
 namespace rx
 {
+class Renderer11;
 class RenderTarget11;
 struct WorkaroundsD3D;
 struct Renderer11DeviceCaps;
@@ -377,6 +378,8 @@
                                                         const gl::Extents &size,
                                                         ID3D11Device *device);
 
+bool UsePresentPathFast(const Renderer11 *renderer, const gl::FramebufferAttachment *colorbuffer);
+
 }  // namespace rx
 
 #endif // LIBANGLE_RENDERER_D3D_D3D11_RENDERER11_UTILS_H_
diff --git a/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp b/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp
index 1ba736e..71f0e42 100644
--- a/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp
@@ -154,7 +154,8 @@
     swapChainDesc.Stereo = FALSE;
     swapChainDesc.SampleDesc.Count = 1;
     swapChainDesc.SampleDesc.Quality = 0;
-    swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_BACK_BUFFER;
+    swapChainDesc.BufferUsage =
+        DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_BACK_BUFFER;
     swapChainDesc.BufferCount = 2;
     swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
     swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
diff --git a/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp b/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp
index 693b768..1ed3645 100644
--- a/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp
@@ -257,7 +257,8 @@
     swapChainDesc.Stereo = FALSE;
     swapChainDesc.SampleDesc.Count = 1;
     swapChainDesc.SampleDesc.Quality = 0;
-    swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_BACK_BUFFER;
+    swapChainDesc.BufferUsage =
+        DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_BACK_BUFFER;
     swapChainDesc.BufferCount = 2;
     swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
     swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
diff --git a/src/libANGLE/renderer/d3d/d3d9/StateManager9.cpp b/src/libANGLE/renderer/d3d/d3d9/StateManager9.cpp
index 440a776..c4c600a 100644
--- a/src/libANGLE/renderer/d3d/d3d9/StateManager9.cpp
+++ b/src/libANGLE/renderer/d3d/d3d9/StateManager9.cpp
@@ -489,8 +489,8 @@
     mCurIgnoreViewport = ignoreViewport;
 
     // Setting shader constants
-    dx_VertexConstants vc = {};
-    dx_PixelConstants pc  = {};
+    dx_VertexConstants9 vc = {};
+    dx_PixelConstants9 pc  = {};
 
     vc.viewAdjust[0] =
         static_cast<float>((actualViewport.width - static_cast<int>(dxViewport.Width)) +
@@ -520,13 +520,13 @@
     pc.depthRange[1] = actualZFar;
     pc.depthRange[2] = actualZFar - actualZNear;
 
-    if (memcmp(&vc, &mVertexConstants, sizeof(dx_VertexConstants)) != 0)
+    if (memcmp(&vc, &mVertexConstants, sizeof(dx_VertexConstants9)) != 0)
     {
         mVertexConstants = vc;
         mDxUniformsDirty = true;
     }
 
-    if (memcmp(&pc, &mPixelConstants, sizeof(dx_PixelConstants)) != 0)
+    if (memcmp(&pc, &mPixelConstants, sizeof(dx_PixelConstants9)) != 0)
     {
         mPixelConstants  = pc;
         mDxUniformsDirty = true;
@@ -542,9 +542,9 @@
 
     IDirect3DDevice9 *device = mRenderer9->getDevice();
     device->SetVertexShaderConstantF(0, reinterpret_cast<float *>(&mVertexConstants),
-                                     sizeof(dx_VertexConstants) / sizeof(float[4]));
+                                     sizeof(dx_VertexConstants9) / sizeof(float[4]));
     device->SetPixelShaderConstantF(0, reinterpret_cast<float *>(&mPixelConstants),
-                                    sizeof(dx_PixelConstants) / sizeof(float[4]));
+                                    sizeof(dx_PixelConstants9) / sizeof(float[4]));
     mDxUniformsDirty = false;
 }
 
diff --git a/src/libANGLE/renderer/d3d/d3d9/StateManager9.h b/src/libANGLE/renderer/d3d/d3d9/StateManager9.h
index 6306855..d8c1eb9 100644
--- a/src/libANGLE/renderer/d3d/d3d9/StateManager9.h
+++ b/src/libANGLE/renderer/d3d/d3d9/StateManager9.h
@@ -19,6 +19,20 @@
 
 class Renderer9;
 
+struct dx_VertexConstants9
+{
+    float depthRange[4];
+    float viewAdjust[4];
+    float viewCoords[4];
+};
+
+struct dx_PixelConstants9
+{
+    float depthRange[4];
+    float viewCoords[4];
+    float depthFront[4];
+};
+
 class StateManager9 final : angle::NonCopyable
 {
   public:
@@ -175,8 +189,8 @@
     float mCurDepthFront;
     bool mCurIgnoreViewport;
 
-    dx_VertexConstants mVertexConstants;
-    dx_PixelConstants mPixelConstants;
+    dx_VertexConstants9 mVertexConstants;
+    dx_PixelConstants9 mPixelConstants;
     bool mDxUniformsDirty;
 
     // FIXME: Unsupported by D3D9