Remove uniform memory copy from GL front-end.

This moves the uniform query to the back-end. In D3D, this requires
a bit more redesign, especially for matrix uniforms.

Gives about a 10% speed improvement in the GL/NULL uniforms stress
test on Windows (UniformsBenchmark.Run/gl_null_400_vec4).

BUG=angleproject:1390

Change-Id: Idac22a77118e9e94d2f28c585e31ff0bc785ba94
Reviewed-on: https://chromium-review.googlesource.com/623929
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/libANGLE/Program.cpp b/src/libANGLE/Program.cpp
index b5b8b77..4916ecf 100644
--- a/src/libANGLE/Program.cpp
+++ b/src/libANGLE/Program.cpp
@@ -1333,127 +1333,145 @@
 
 void Program::setUniform1fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 1, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 1, v);
     mProgram->setUniform1fv(location, clampedCount, v);
 }
 
 void Program::setUniform2fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 2, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 2, v);
     mProgram->setUniform2fv(location, clampedCount, v);
 }
 
 void Program::setUniform3fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 3, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 3, v);
     mProgram->setUniform3fv(location, clampedCount, v);
 }
 
 void Program::setUniform4fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 4, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 4, v);
     mProgram->setUniform4fv(location, clampedCount, v);
 }
 
 void Program::setUniform1iv(GLint location, GLsizei count, const GLint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 1, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 1, v);
+
+    if (mState.isSamplerUniformIndex(locationInfo.index))
+    {
+        updateSamplerUniform(locationInfo, clampedCount, v);
+    }
+
     mProgram->setUniform1iv(location, clampedCount, v);
 }
 
 void Program::setUniform2iv(GLint location, GLsizei count, const GLint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 2, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 2, v);
     mProgram->setUniform2iv(location, clampedCount, v);
 }
 
 void Program::setUniform3iv(GLint location, GLsizei count, const GLint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 3, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 3, v);
     mProgram->setUniform3iv(location, clampedCount, v);
 }
 
 void Program::setUniform4iv(GLint location, GLsizei count, const GLint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 4, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 4, v);
     mProgram->setUniform4iv(location, clampedCount, v);
 }
 
 void Program::setUniform1uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 1, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 1, v);
     mProgram->setUniform1uiv(location, clampedCount, v);
 }
 
 void Program::setUniform2uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 2, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 2, v);
     mProgram->setUniform2uiv(location, clampedCount, v);
 }
 
 void Program::setUniform3uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 3, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 3, v);
     mProgram->setUniform3uiv(location, clampedCount, v);
 }
 
 void Program::setUniform4uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    GLsizei clampedCount = setUniformInternal(location, count, 4, v);
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+    GLsizei clampedCount                 = clampUniformCount(locationInfo, count, 4, v);
     mProgram->setUniform4uiv(location, clampedCount, v);
 }
 
 void Program::setUniformMatrix2fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<2, 2>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<2, 2>(location, count, transpose, v);
     mProgram->setUniformMatrix2fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix3fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<3, 3>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<3, 3>(location, count, transpose, v);
     mProgram->setUniformMatrix3fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<4, 4>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<4, 4>(location, count, transpose, v);
     mProgram->setUniformMatrix4fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix2x3fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<2, 3>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<2, 3>(location, count, transpose, v);
     mProgram->setUniformMatrix2x3fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix2x4fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<2, 4>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<2, 4>(location, count, transpose, v);
     mProgram->setUniformMatrix2x4fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix3x2fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<3, 2>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<3, 2>(location, count, transpose, v);
     mProgram->setUniformMatrix3x2fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix3x4fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<3, 4>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<3, 4>(location, count, transpose, v);
     mProgram->setUniformMatrix3x4fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix4x2fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<4, 2>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<4, 2>(location, count, transpose, v);
     mProgram->setUniformMatrix4x2fv(location, clampedCount, transpose, v);
 }
 
 void Program::setUniformMatrix4x3fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *v)
 {
-    GLsizei clampedCount = setMatrixUniformInternal<4, 3>(location, count, transpose, v);
+    GLsizei clampedCount = clampMatrixUniformCount<4, 3>(location, count, transpose, v);
     mProgram->setUniformMatrix4x3fv(location, clampedCount, transpose, v);
 }
 
@@ -2942,15 +2960,12 @@
     }
 }
 
-template <>
 void Program::updateSamplerUniform(const VariableLocation &locationInfo,
-                                   const uint8_t *destPointer,
                                    GLsizei clampedCount,
                                    const GLint *v)
 {
     // Invalidate the validation cache only if we modify the sampler data.
-    if (mState.isSamplerUniformIndex(locationInfo.index) &&
-        memcmp(destPointer, v, sizeof(GLint) * clampedCount) != 0)
+    if (mState.isSamplerUniformIndex(locationInfo.index))
     {
         GLuint samplerIndex = mState.getSamplerIndexFromUniformIndex(locationInfo.index);
         std::vector<GLuint> *boundTextureUnits =
@@ -2962,90 +2977,46 @@
 }
 
 template <typename T>
-void Program::updateSamplerUniform(const VariableLocation &locationInfo,
-                                   const uint8_t *destPointer,
-                                   GLsizei clampedCount,
+GLsizei Program::clampUniformCount(const VariableLocation &locationInfo,
+                                   GLsizei count,
+                                   int vectorSize,
                                    const T *v)
 {
-}
-
-template <typename T>
-GLsizei Program::setUniformInternal(GLint location, GLsizei countIn, int vectorSize, const T *v)
-{
-    const VariableLocation &locationInfo = mState.mUniformLocations[location];
-    LinkedUniform *linkedUniform         = &mState.mUniforms[locationInfo.index];
-    uint8_t *destPointer                 = linkedUniform->getDataPtrToElement(locationInfo.element);
+    const LinkedUniform &linkedUniform = mState.mUniforms[locationInfo.index];
 
     // OpenGL ES 3.0.4 spec pg 67: "Values for any array element that exceeds the highest array
     // element index used, as reported by GetActiveUniform, will be ignored by the GL."
-    unsigned int remainingElements = linkedUniform->elementCount() - locationInfo.element;
+    unsigned int remainingElements = linkedUniform.elementCount() - locationInfo.element;
     GLsizei maxElementCount =
-        static_cast<GLsizei>(remainingElements * linkedUniform->getElementComponents());
+        static_cast<GLsizei>(remainingElements * linkedUniform.getElementComponents());
 
-    GLsizei count        = countIn;
-    GLsizei clampedCount = count * vectorSize;
-    if (clampedCount > maxElementCount)
+    if (count * vectorSize > maxElementCount)
     {
-        clampedCount = maxElementCount;
-        count        = maxElementCount / vectorSize;
-    }
-
-    // VariableComponentType(linkedUniform->type) has a dozens of compares and thus is evil for
-    // inlining with regards to code size. This version is one subtract and one compare only.
-    if (IsVariableComponentTypeBool(linkedUniform->type))
-    {
-        // Do a cast conversion for boolean types. From the spec:
-        // "The uniform is set to FALSE if the input value is 0 or 0.0f, and set to TRUE otherwise."
-        GLint *destAsInt = reinterpret_cast<GLint *>(destPointer);
-        for (GLsizei component = 0; component < clampedCount; ++component)
-        {
-            destAsInt[component] = (v[component] != static_cast<T>(0) ? GL_TRUE : GL_FALSE);
-        }
-    }
-    else
-    {
-        updateSamplerUniform(locationInfo, destPointer, clampedCount, v);
-        memcpy(destPointer, v, sizeof(T) * clampedCount);
+        return maxElementCount / vectorSize;
     }
 
     return count;
 }
 
 template <size_t cols, size_t rows, typename T>
-GLsizei Program::setMatrixUniformInternal(GLint location,
-                                          GLsizei count,
-                                          GLboolean transpose,
-                                          const T *v)
+GLsizei Program::clampMatrixUniformCount(GLint location,
+                                         GLsizei count,
+                                         GLboolean transpose,
+                                         const T *v)
 {
+    const VariableLocation &locationInfo = mState.mUniformLocations[location];
+
     if (!transpose)
     {
-        return setUniformInternal(location, count, cols * rows, v);
+        return clampUniformCount(locationInfo, count, cols * rows, v);
     }
 
-    // Perform a transposing copy.
-    const VariableLocation &locationInfo = mState.mUniformLocations[location];
-    LinkedUniform *linkedUniform         = &mState.mUniforms[locationInfo.index];
-    T *destPtr = reinterpret_cast<T *>(linkedUniform->getDataPtrToElement(locationInfo.element));
+    const LinkedUniform &linkedUniform = mState.mUniforms[locationInfo.index];
 
     // OpenGL ES 3.0.4 spec pg 67: "Values for any array element that exceeds the highest array
     // element index used, as reported by GetActiveUniform, will be ignored by the GL."
-    unsigned int remainingElements = linkedUniform->elementCount() - locationInfo.element;
-    GLsizei clampedCount           = std::min(count, static_cast<GLsizei>(remainingElements));
-
-    for (GLsizei element = 0; element < clampedCount; ++element)
-    {
-        size_t elementOffset = element * rows * cols;
-
-        for (size_t row = 0; row < rows; ++row)
-        {
-            for (size_t col = 0; col < cols; ++col)
-            {
-                destPtr[col * rows + row + elementOffset] = v[row * cols + col + elementOffset];
-            }
-        }
-    }
-
-    return clampedCount;
+    unsigned int remainingElements = linkedUniform.elementCount() - locationInfo.element;
+    return std::min(count, static_cast<GLsizei>(remainingElements));
 }
 
 // Driver differences mean that doing the uniform value cast ourselves gives consistent results.
diff --git a/src/libANGLE/Program.h b/src/libANGLE/Program.h
index bc15ff8..e3e20bd 100644
--- a/src/libANGLE/Program.h
+++ b/src/libANGLE/Program.h
@@ -618,17 +618,16 @@
     // Both these function update the cached uniform values and return a modified "count"
     // so that the uniform update doesn't overflow the uniform.
     template <typename T>
-    GLsizei setUniformInternal(GLint location, GLsizei count, int vectorSize, const T *v);
-    template <size_t cols, size_t rows, typename T>
-    GLsizei setMatrixUniformInternal(GLint location,
-                                     GLsizei count,
-                                     GLboolean transpose,
-                                     const T *v);
-    template <typename T>
-    void updateSamplerUniform(const VariableLocation &locationInfo,
-                              const uint8_t *destPointer,
-                              GLsizei clampedCount,
+    GLsizei clampUniformCount(const VariableLocation &locationInfo,
+                              GLsizei count,
+                              int vectorSize,
                               const T *v);
+    template <size_t cols, size_t rows, typename T>
+    GLsizei clampMatrixUniformCount(GLint location, GLsizei count, GLboolean transpose, const T *v);
+
+    void updateSamplerUniform(const VariableLocation &locationInfo,
+                              GLsizei clampedCount,
+                              const GLint *v);
 
     template <typename DestT>
     void getUniformInternal(const Context *context,
diff --git a/src/libANGLE/Uniform.cpp b/src/libANGLE/Uniform.cpp
index e5344f5..f98bb32 100644
--- a/src/libANGLE/Uniform.cpp
+++ b/src/libANGLE/Uniform.cpp
@@ -46,15 +46,10 @@
 LinkedUniform::LinkedUniform(const LinkedUniform &uniform)
     : sh::Uniform(uniform), bufferIndex(uniform.bufferIndex), blockInfo(uniform.blockInfo)
 {
-    // This function is not intended to be called during runtime.
-    ASSERT(uniform.mLazyData.empty());
 }
 
 LinkedUniform &LinkedUniform::operator=(const LinkedUniform &uniform)
 {
-    // This function is not intended to be called during runtime.
-    ASSERT(uniform.mLazyData.empty());
-
     sh::Uniform::operator=(uniform);
     bufferIndex          = uniform.bufferIndex;
     blockInfo            = uniform.blockInfo;
@@ -71,23 +66,6 @@
     return bufferIndex == -1;
 }
 
-size_t LinkedUniform::dataSize() const
-{
-    ASSERT(type != GL_STRUCT_ANGLEX);
-    if (mLazyData.empty())
-    {
-        mLazyData.resize(VariableExternalSize(type) * elementCount());
-        ASSERT(!mLazyData.empty());
-    }
-
-    return mLazyData.size();
-}
-
-const uint8_t *LinkedUniform::data() const
-{
-    return const_cast<LinkedUniform *>(this)->data();
-}
-
 bool LinkedUniform::isSampler() const
 {
     return IsSamplerType(type);
@@ -118,17 +96,6 @@
     return VariableComponentCount(type);
 }
 
-uint8_t *LinkedUniform::getDataPtrToElement(size_t elementIndex)
-{
-    ASSERT((!isArray() && elementIndex == 0) || (isArray() && elementIndex < arraySize));
-    return data() + (elementIndex > 0 ? (getElementSize() * elementIndex) : 0u);
-}
-
-const uint8_t *LinkedUniform::getDataPtrToElement(size_t elementIndex) const
-{
-    return const_cast<LinkedUniform *>(this)->getDataPtrToElement(elementIndex);
-}
-
 ShaderVariableBuffer::ShaderVariableBuffer()
     : binding(0),
       dataSize(0),
diff --git a/src/libANGLE/Uniform.h b/src/libANGLE/Uniform.h
index 65d956a..05ad0f5 100644
--- a/src/libANGLE/Uniform.h
+++ b/src/libANGLE/Uniform.h
@@ -37,19 +37,6 @@
     LinkedUniform &operator=(const LinkedUniform &uniform);
     ~LinkedUniform();
 
-    size_t dataSize() const;
-    uint8_t *data()
-    {
-        if (mLazyData.empty())
-        {
-            // dataSize() will init the data store.
-            size_t size = dataSize();
-            memset(mLazyData.data(), 0, size);
-        }
-
-        return mLazyData.data();
-    }
-    const uint8_t *data() const;
     bool isSampler() const;
     bool isImage() const;
     bool isAtomicCounter() const;
@@ -57,15 +44,10 @@
     bool isField() const;
     size_t getElementSize() const;
     size_t getElementComponents() const;
-    uint8_t *getDataPtrToElement(size_t elementIndex);
-    const uint8_t *getDataPtrToElement(size_t elementIndex) const;
 
     // Identifies the containing buffer backed resource -- interface block or atomic counter buffer.
     int bufferIndex;
     sh::BlockMemberInfo blockInfo;
-
-  private:
-    mutable angle::MemoryBuffer mLazyData;
 };
 
 // Parent struct for atomic counter, uniform block, and shader storage block buffer, which all
diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.cpp b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
index 61f90ff..0d571d3 100644
--- a/src/libANGLE/renderer/d3d/ProgramD3D.cpp
+++ b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
@@ -146,16 +146,6 @@
     }
 }
 
-template <typename T>
-static inline void SetIfDirty(T *dest, const T &source, bool *dirtyFlag)
-{
-    ASSERT(dest != nullptr);
-    ASSERT(dirtyFlag != nullptr);
-
-    *dirtyFlag = *dirtyFlag || (memcmp(dest, &source, sizeof(T)) != 0);
-    *dest      = source;
-}
-
 template <typename T, int cols, int rows>
 bool TransposeExpandMatrix(T *target, const GLfloat *value)
 {
@@ -259,6 +249,26 @@
     return false;
 }
 
+// Helper method to de-tranpose a matrix uniform for an API query.
+void GetMatrixUniform(GLint columns, GLint rows, GLfloat *dataOut, const GLfloat *source)
+{
+    for (GLint col = 0; col < columns; ++col)
+    {
+        for (GLint row = 0; row < rows; ++row)
+        {
+            GLfloat *outptr      = dataOut + ((col * rows) + row);
+            const GLfloat *inptr = source + ((row * 4) + col);
+            *outptr              = *inptr;
+        }
+    }
+}
+
+template <typename NonFloatT>
+void GetMatrixUniform(GLint columns, GLint rows, NonFloatT *dataOut, const NonFloatT *source)
+{
+    UNREACHABLE();
+}
+
 }  // anonymous namespace
 
 // D3DUniform Implementation
@@ -270,7 +280,9 @@
     : type(typeIn),
       name(nameIn),
       arraySize(arraySizeIn),
-      data(nullptr),
+      vsData(nullptr),
+      psData(nullptr),
+      csData(nullptr),
       dirty(true),
       vsRegisterIndex(GL_INVALID_INDEX),
       psRegisterIndex(GL_INVALID_INDEX),
@@ -283,10 +295,6 @@
     // Uniform blocks/buffers are treated separately by the Renderer (ES3 path only)
     if (defaultBlock)
     {
-        size_t bytes = gl::VariableInternalSize(type) * elementCount();
-        data = new uint8_t[bytes];
-        memset(data, 0, bytes);
-
         // Use the row count as register count, will work for non-square matrices.
         registerCount = gl::VariableRowCount(type) * elementCount();
     }
@@ -294,7 +302,19 @@
 
 D3DUniform::~D3DUniform()
 {
-    SafeDeleteArray(data);
+}
+
+const uint8_t *D3DUniform::getDataPtrToElement(size_t elementIndex) const
+{
+    ASSERT((arraySize == 0 && elementIndex == 0) || (arraySize > 0 && elementIndex < arraySize));
+
+    if (isSampler())
+    {
+        return reinterpret_cast<const uint8_t *>(&mSamplerData[elementIndex]);
+    }
+
+    return firstNonNullData() +
+           (elementIndex > 0 ? (gl::VariableInternalSize(type) * elementIndex) : 0u);
 }
 
 bool D3DUniform::isSampler() const
@@ -317,6 +337,18 @@
     return csRegisterIndex != GL_INVALID_INDEX;
 }
 
+const uint8_t *D3DUniform::firstNonNullData() const
+{
+    ASSERT(vsData || psData || csData || !mSamplerData.empty());
+
+    if (!mSamplerData.empty())
+    {
+        return reinterpret_cast<const uint8_t *>(mSamplerData.data());
+    }
+
+    return vsData ? vsData : (psData ? psData : csData);
+}
+
 // D3DVarying Implementation
 
 D3DVarying::D3DVarying() : semanticIndex(0), componentCount(0), outputSlot(0)
@@ -687,7 +719,6 @@
             continue;
 
         int count = d3dUniform->elementCount();
-        const GLint(*v)[4] = reinterpret_cast<const GLint(*)[4]>(d3dUniform->data);
 
         if (d3dUniform->isReferencedByFragmentShader())
         {
@@ -700,7 +731,7 @@
                 if (samplerIndex < mSamplersPS.size())
                 {
                     ASSERT(mSamplersPS[samplerIndex].active);
-                    mSamplersPS[samplerIndex].logicalTextureUnit = v[i][0];
+                    mSamplersPS[samplerIndex].logicalTextureUnit = d3dUniform->mSamplerData[i];
                 }
             }
         }
@@ -716,7 +747,7 @@
                 if (samplerIndex < mSamplersVS.size())
                 {
                     ASSERT(mSamplersVS[samplerIndex].active);
-                    mSamplersVS[samplerIndex].logicalTextureUnit = v[i][0];
+                    mSamplersVS[samplerIndex].logicalTextureUnit = d3dUniform->mSamplerData[i];
                 }
             }
         }
@@ -732,7 +763,7 @@
                 if (samplerIndex < mSamplersCS.size())
                 {
                     ASSERT(mSamplersCS[samplerIndex].active);
-                    mSamplersCS[samplerIndex].logicalTextureUnit = v[i][0];
+                    mSamplersCS[samplerIndex].logicalTextureUnit = d3dUniform->mSamplerData[i];
                 }
             }
         }
@@ -1725,6 +1756,34 @@
         mRenderer->createUniformStorage(fragmentRegisters * 16u));
     mComputeUniformStorage =
         std::unique_ptr<UniformStorageD3D>(mRenderer->createUniformStorage(computeRegisters * 16u));
+
+    // Iterate the uniforms again to assign data pointers to default block uniforms.
+    for (D3DUniform *d3dUniform : mD3DUniforms)
+    {
+        if (d3dUniform->isSampler())
+        {
+            d3dUniform->mSamplerData.resize(d3dUniform->elementCount(), 0);
+            continue;
+        }
+
+        if (d3dUniform->isReferencedByVertexShader())
+        {
+            d3dUniform->vsData = mVertexUniformStorage->getDataPointer(d3dUniform->vsRegisterIndex,
+                                                                       d3dUniform->registerElement);
+        }
+
+        if (d3dUniform->isReferencedByFragmentShader())
+        {
+            d3dUniform->psData = mFragmentUniformStorage->getDataPointer(
+                d3dUniform->psRegisterIndex, d3dUniform->registerElement);
+        }
+
+        if (d3dUniform->isReferencedByComputeShader())
+        {
+            d3dUniform->csData = mComputeUniformStorage->getDataPointer(
+                d3dUniform->csRegisterIndex, d3dUniform->registerElement);
+        }
+    }
 }
 
 gl::Error ProgramD3D::applyUniforms(GLenum drawMode)
@@ -1823,22 +1882,22 @@
 
 void ProgramD3D::setUniform1fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    setUniform(location, count, v, GL_FLOAT);
+    setUniformInternal(location, count, v, GL_FLOAT);
 }
 
 void ProgramD3D::setUniform2fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    setUniform(location, count, v, GL_FLOAT_VEC2);
+    setUniformInternal(location, count, v, GL_FLOAT_VEC2);
 }
 
 void ProgramD3D::setUniform3fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    setUniform(location, count, v, GL_FLOAT_VEC3);
+    setUniformInternal(location, count, v, GL_FLOAT_VEC3);
 }
 
 void ProgramD3D::setUniform4fv(GLint location, GLsizei count, const GLfloat *v)
 {
-    setUniform(location, count, v, GL_FLOAT_VEC4);
+    setUniformInternal(location, count, v, GL_FLOAT_VEC4);
 }
 
 void ProgramD3D::setUniformMatrix2fv(GLint location,
@@ -1846,7 +1905,7 @@
                                      GLboolean transpose,
                                      const GLfloat *value)
 {
-    setUniformMatrixfv<2, 2>(location, count, transpose, value, GL_FLOAT_MAT2);
+    setUniformMatrixfvInternal<2, 2>(location, count, transpose, value, GL_FLOAT_MAT2);
 }
 
 void ProgramD3D::setUniformMatrix3fv(GLint location,
@@ -1854,7 +1913,7 @@
                                      GLboolean transpose,
                                      const GLfloat *value)
 {
-    setUniformMatrixfv<3, 3>(location, count, transpose, value, GL_FLOAT_MAT3);
+    setUniformMatrixfvInternal<3, 3>(location, count, transpose, value, GL_FLOAT_MAT3);
 }
 
 void ProgramD3D::setUniformMatrix4fv(GLint location,
@@ -1862,7 +1921,7 @@
                                      GLboolean transpose,
                                      const GLfloat *value)
 {
-    setUniformMatrixfv<4, 4>(location, count, transpose, value, GL_FLOAT_MAT4);
+    setUniformMatrixfvInternal<4, 4>(location, count, transpose, value, GL_FLOAT_MAT4);
 }
 
 void ProgramD3D::setUniformMatrix2x3fv(GLint location,
@@ -1870,7 +1929,7 @@
                                        GLboolean transpose,
                                        const GLfloat *value)
 {
-    setUniformMatrixfv<2, 3>(location, count, transpose, value, GL_FLOAT_MAT2x3);
+    setUniformMatrixfvInternal<2, 3>(location, count, transpose, value, GL_FLOAT_MAT2x3);
 }
 
 void ProgramD3D::setUniformMatrix3x2fv(GLint location,
@@ -1878,7 +1937,7 @@
                                        GLboolean transpose,
                                        const GLfloat *value)
 {
-    setUniformMatrixfv<3, 2>(location, count, transpose, value, GL_FLOAT_MAT3x2);
+    setUniformMatrixfvInternal<3, 2>(location, count, transpose, value, GL_FLOAT_MAT3x2);
 }
 
 void ProgramD3D::setUniformMatrix2x4fv(GLint location,
@@ -1886,7 +1945,7 @@
                                        GLboolean transpose,
                                        const GLfloat *value)
 {
-    setUniformMatrixfv<2, 4>(location, count, transpose, value, GL_FLOAT_MAT2x4);
+    setUniformMatrixfvInternal<2, 4>(location, count, transpose, value, GL_FLOAT_MAT2x4);
 }
 
 void ProgramD3D::setUniformMatrix4x2fv(GLint location,
@@ -1894,7 +1953,7 @@
                                        GLboolean transpose,
                                        const GLfloat *value)
 {
-    setUniformMatrixfv<4, 2>(location, count, transpose, value, GL_FLOAT_MAT4x2);
+    setUniformMatrixfvInternal<4, 2>(location, count, transpose, value, GL_FLOAT_MAT4x2);
 }
 
 void ProgramD3D::setUniformMatrix3x4fv(GLint location,
@@ -1902,7 +1961,7 @@
                                        GLboolean transpose,
                                        const GLfloat *value)
 {
-    setUniformMatrixfv<3, 4>(location, count, transpose, value, GL_FLOAT_MAT3x4);
+    setUniformMatrixfvInternal<3, 4>(location, count, transpose, value, GL_FLOAT_MAT3x4);
 }
 
 void ProgramD3D::setUniformMatrix4x3fv(GLint location,
@@ -1910,47 +1969,47 @@
                                        GLboolean transpose,
                                        const GLfloat *value)
 {
-    setUniformMatrixfv<4, 3>(location, count, transpose, value, GL_FLOAT_MAT4x3);
+    setUniformMatrixfvInternal<4, 3>(location, count, transpose, value, GL_FLOAT_MAT4x3);
 }
 
 void ProgramD3D::setUniform1iv(GLint location, GLsizei count, const GLint *v)
 {
-    setUniform(location, count, v, GL_INT);
+    setUniformInternal(location, count, v, GL_INT);
 }
 
 void ProgramD3D::setUniform2iv(GLint location, GLsizei count, const GLint *v)
 {
-    setUniform(location, count, v, GL_INT_VEC2);
+    setUniformInternal(location, count, v, GL_INT_VEC2);
 }
 
 void ProgramD3D::setUniform3iv(GLint location, GLsizei count, const GLint *v)
 {
-    setUniform(location, count, v, GL_INT_VEC3);
+    setUniformInternal(location, count, v, GL_INT_VEC3);
 }
 
 void ProgramD3D::setUniform4iv(GLint location, GLsizei count, const GLint *v)
 {
-    setUniform(location, count, v, GL_INT_VEC4);
+    setUniformInternal(location, count, v, GL_INT_VEC4);
 }
 
 void ProgramD3D::setUniform1uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    setUniform(location, count, v, GL_UNSIGNED_INT);
+    setUniformInternal(location, count, v, GL_UNSIGNED_INT);
 }
 
 void ProgramD3D::setUniform2uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    setUniform(location, count, v, GL_UNSIGNED_INT_VEC2);
+    setUniformInternal(location, count, v, GL_UNSIGNED_INT_VEC2);
 }
 
 void ProgramD3D::setUniform3uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    setUniform(location, count, v, GL_UNSIGNED_INT_VEC3);
+    setUniformInternal(location, count, v, GL_UNSIGNED_INT_VEC3);
 }
 
 void ProgramD3D::setUniform4uiv(GLint location, GLsizei count, const GLuint *v)
 {
-    setUniform(location, count, v, GL_UNSIGNED_INT_VEC4);
+    setUniformInternal(location, count, v, GL_UNSIGNED_INT_VEC4);
 }
 
 void ProgramD3D::setUniformBlockBinding(GLuint /*uniformBlockIndex*/,
@@ -2114,15 +2173,18 @@
         if (shaderType == GL_FRAGMENT_SHADER)
         {
             d3dUniform->psRegisterIndex = reg;
+            d3dUniform->dirty           = true;
         }
         else if (shaderType == GL_VERTEX_SHADER)
         {
             d3dUniform->vsRegisterIndex = reg;
+            d3dUniform->dirty           = true;
         }
         else
         {
             ASSERT(shaderType == GL_COMPUTE_SHADER);
             d3dUniform->csRegisterIndex = reg;
+            d3dUniform->dirty           = true;
         }
 
         // Arrays are treated as aggregate types
@@ -2134,39 +2196,36 @@
 }
 
 template <typename T>
-void ProgramD3D::setUniform(GLint location, GLsizei countIn, const T *v, GLenum targetUniformType)
+void ProgramD3D::setUniformImpl(const gl::VariableLocation &locationInfo,
+                                GLsizei countIn,
+                                const T *v,
+                                uint8_t *targetData,
+                                GLenum targetUniformType)
 {
     const int components        = gl::VariableComponentCount(targetUniformType);
     const GLenum targetBoolType = gl::VariableBoolVectorType(targetUniformType);
 
-    D3DUniform *targetUniform = getD3DUniformFromLocation(location);
+    D3DUniform *targetUniform = mD3DUniforms[locationInfo.index];
 
     unsigned int elementCount = targetUniform->elementCount();
-    unsigned int arrayElement = mState.getUniformLocations()[location].element;
+    unsigned int arrayElement = locationInfo.element;
     unsigned int count        = std::min(elementCount - arrayElement, static_cast<unsigned int>(countIn));
 
     if (targetUniform->type == targetUniformType)
     {
-        T *target = reinterpret_cast<T *>(targetUniform->data) + arrayElement * 4;
+        T *target = reinterpret_cast<T *>(targetData) + arrayElement * 4;
 
         for (unsigned int i = 0; i < count; i++)
         {
             T *dest         = target + (i * 4);
             const T *source = v + (i * components);
-
-            for (int c = 0; c < components; c++)
-            {
-                SetIfDirty(dest + c, source[c], &targetUniform->dirty);
-            }
-            for (int c = components; c < 4; c++)
-            {
-                SetIfDirty(dest + c, T(0), &targetUniform->dirty);
-            }
+            memcpy(dest, source, components * sizeof(T));
+            targetUniform->dirty = true;
         }
     }
     else if (targetUniform->type == targetBoolType)
     {
-        GLint *boolParams = reinterpret_cast<GLint *>(targetUniform->data) + arrayElement * 4;
+        GLint *boolParams = reinterpret_cast<GLint *>(targetData) + arrayElement * 4;
 
         for (unsigned int i = 0; i < count; i++)
         {
@@ -2175,50 +2234,57 @@
 
             for (int c = 0; c < components; c++)
             {
-                SetIfDirty(dest + c, (source[c] == static_cast<T>(0)) ? GL_FALSE : GL_TRUE,
-                           &targetUniform->dirty);
+                dest[c] = (source[c] == static_cast<T>(0)) ? GL_FALSE : GL_TRUE;
             }
-            for (int c = components; c < 4; c++)
-            {
-                SetIfDirty(dest + c, GL_FALSE, &targetUniform->dirty);
-            }
-        }
-    }
-    else if (targetUniform->isSampler())
-    {
-        ASSERT(targetUniformType == GL_INT);
-
-        GLint *target = reinterpret_cast<GLint *>(targetUniform->data) + arrayElement * 4;
-
-        bool wasDirty = targetUniform->dirty;
-
-        for (unsigned int i = 0; i < count; i++)
-        {
-            GLint *dest         = target + (i * 4);
-            const GLint *source = reinterpret_cast<const GLint *>(v) + (i * components);
-
-            SetIfDirty(dest + 0, source[0], &targetUniform->dirty);
-            SetIfDirty(dest + 1, 0, &targetUniform->dirty);
-            SetIfDirty(dest + 2, 0, &targetUniform->dirty);
-            SetIfDirty(dest + 3, 0, &targetUniform->dirty);
-        }
-
-        if (!wasDirty && targetUniform->dirty)
-        {
-            mDirtySamplerMapping = true;
+            targetUniform->dirty = true;
         }
     }
     else
         UNREACHABLE();
 }
 
-template <int cols, int rows>
-void ProgramD3D::setUniformMatrixfv(GLint location,
-                                    GLsizei countIn,
-                                    GLboolean transpose,
-                                    const GLfloat *value,
+template <typename T>
+void ProgramD3D::setUniformInternal(GLint location,
+                                    GLsizei count,
+                                    const T *v,
                                     GLenum targetUniformType)
 {
+    const gl::VariableLocation &locationInfo = mState.getUniformLocations()[location];
+    D3DUniform *targetUniform                = mD3DUniforms[locationInfo.index];
+
+    if (!targetUniform->mSamplerData.empty())
+    {
+        ASSERT(targetUniformType == GL_INT);
+        memcpy(&targetUniform->mSamplerData[locationInfo.element], v, count * sizeof(T));
+        mDirtySamplerMapping = true;
+        targetUniform->dirty = true;
+        return;
+    }
+
+    if (targetUniform->vsData)
+    {
+        setUniformImpl(locationInfo, count, v, targetUniform->vsData, targetUniformType);
+    }
+
+    if (targetUniform->psData)
+    {
+        setUniformImpl(locationInfo, count, v, targetUniform->psData, targetUniformType);
+    }
+
+    if (targetUniform->csData)
+    {
+        setUniformImpl(locationInfo, count, v, targetUniform->csData, targetUniformType);
+    }
+}
+
+template <int cols, int rows>
+void ProgramD3D::setUniformMatrixfvImpl(GLint location,
+                                        GLsizei countIn,
+                                        GLboolean transpose,
+                                        const GLfloat *value,
+                                        uint8_t *targetData,
+                                        GLenum targetUniformType)
+{
     D3DUniform *targetUniform = getD3DUniformFromLocation(location);
 
     unsigned int elementCount = targetUniform->elementCount();
@@ -2226,8 +2292,8 @@
     unsigned int count        = std::min(elementCount - arrayElement, static_cast<unsigned int>(countIn));
 
     const unsigned int targetMatrixStride = (4 * rows);
-    GLfloat *target =
-        (GLfloat *)(targetUniform->data + arrayElement * sizeof(GLfloat) * targetMatrixStride);
+    GLfloat *target = reinterpret_cast<GLfloat *>(targetData + arrayElement * sizeof(GLfloat) *
+                                                                   targetMatrixStride);
 
     for (unsigned int i = 0; i < count; i++)
     {
@@ -2247,6 +2313,34 @@
     }
 }
 
+template <int cols, int rows>
+void ProgramD3D::setUniformMatrixfvInternal(GLint location,
+                                            GLsizei countIn,
+                                            GLboolean transpose,
+                                            const GLfloat *value,
+                                            GLenum targetUniformType)
+{
+    D3DUniform *targetUniform = getD3DUniformFromLocation(location);
+
+    if (targetUniform->vsData)
+    {
+        setUniformMatrixfvImpl<cols, rows>(location, countIn, transpose, value,
+                                           targetUniform->vsData, targetUniformType);
+    }
+
+    if (targetUniform->psData)
+    {
+        setUniformMatrixfvImpl<cols, rows>(location, countIn, transpose, value,
+                                           targetUniform->psData, targetUniformType);
+    }
+
+    if (targetUniform->csData)
+    {
+        setUniformMatrixfvImpl<cols, rows>(location, countIn, transpose, value,
+                                           targetUniform->csData, targetUniformType);
+    }
+}
+
 size_t ProgramD3D::getUniformBlockInfo(const sh::InterfaceBlock &interfaceBlock)
 {
     ASSERT(interfaceBlock.staticUse || interfaceBlock.layout != sh::BLOCKLAYOUT_PACKED);
@@ -2525,6 +2619,11 @@
     return mD3DUniforms[mState.getUniformLocations()[location].index];
 }
 
+const D3DUniform *ProgramD3D::getD3DUniformFromLocation(GLint location) const
+{
+    return mD3DUniforms[mState.getUniformLocations()[location].index];
+}
+
 bool ProgramD3D::getUniformBlockSize(const std::string &blockName,
                                      const std::string & /* blockMappedName */,
                                      size_t *sizeOut) const
@@ -2612,8 +2711,18 @@
     const gl::VariableLocation &locationInfo = mState.getUniformLocations()[location];
     const gl::LinkedUniform &uniform         = mState.getUniforms()[locationInfo.index];
 
-    const uint8_t *srcPointer = uniform.getDataPtrToElement(locationInfo.element);
-    memcpy(dataOut, srcPointer, uniform.getElementSize());
+    const D3DUniform *targetUniform = getD3DUniformFromLocation(location);
+    const uint8_t *srcPointer       = targetUniform->getDataPtrToElement(locationInfo.element);
+
+    if (gl::IsMatrixType(uniform.type))
+    {
+        GetMatrixUniform(gl::VariableColumnCount(uniform.type), gl::VariableRowCount(uniform.type),
+                         dataOut, reinterpret_cast<const DestT *>(srcPointer));
+    }
+    else
+    {
+        memcpy(dataOut, srcPointer, uniform.getElementSize());
+    }
 }
 
 void ProgramD3D::getUniformfv(const gl::Context *context, GLint location, GLfloat *params) const
diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.h b/src/libANGLE/renderer/d3d/ProgramD3D.h
index 2d1f4a6..8968a4d 100644
--- a/src/libANGLE/renderer/d3d/ProgramD3D.h
+++ b/src/libANGLE/renderer/d3d/ProgramD3D.h
@@ -32,6 +32,8 @@
 #endif
 
 // Helper struct representing a single shader uniform
+// TODO(jmadill): Make uniform blocks shared between all programs, so we don't need separate
+// register indices.
 struct D3DUniform : private angle::NonCopyable
 {
     D3DUniform(GLenum typeIn,
@@ -46,14 +48,18 @@
     bool isReferencedByFragmentShader() const;
     bool isReferencedByComputeShader() const;
 
+    const uint8_t *firstNonNullData() const;
+    const uint8_t *getDataPtrToElement(size_t elementIndex) const;
+
     // Duplicated from the GL layer
     GLenum type;
     std::string name;
     unsigned int arraySize;
 
-    // Pointer to a system copy of the data.
-    // TODO(jmadill): remove this in favor of gl::LinkedUniform::data().
-    uint8_t *data;
+    // Pointer to a system copies of the data. Separate pointers for each uniform storage type.
+    uint8_t *vsData;
+    uint8_t *psData;
+    uint8_t *csData;
 
     // Has the data been updated since the last sync?
     bool dirty;
@@ -68,6 +74,9 @@
     // uniforms
     // inside aggregate types, which are packed according C-like structure rules.
     unsigned int registerElement;
+
+    // Special buffer for sampler values.
+    std::vector<GLint> mSamplerData;
 };
 
 struct D3DUniformBlock
@@ -371,14 +380,29 @@
     void getUniformInternal(GLint location, DestT *dataOut) const;
 
     template <typename T>
-    void setUniform(GLint location, GLsizei count, const T *v, GLenum targetUniformType);
+    void setUniformImpl(const gl::VariableLocation &locationInfo,
+                        GLsizei count,
+                        const T *v,
+                        uint8_t *targetData,
+                        GLenum targetUniformType);
+
+    template <typename T>
+    void setUniformInternal(GLint location, GLsizei count, const T *v, GLenum targetUniformType);
 
     template <int cols, int rows>
-    void setUniformMatrixfv(GLint location,
-                            GLsizei count,
-                            GLboolean transpose,
-                            const GLfloat *value,
-                            GLenum targetUniformType);
+    void setUniformMatrixfvImpl(GLint location,
+                                GLsizei count,
+                                GLboolean transpose,
+                                const GLfloat *value,
+                                uint8_t *targetData,
+                                GLenum targetUniformType);
+
+    template <int cols, int rows>
+    void setUniformMatrixfvInternal(GLint location,
+                                    GLsizei count,
+                                    GLboolean transpose,
+                                    const GLfloat *value,
+                                    GLenum targetUniformType);
 
     gl::LinkResult compileProgramExecutables(const gl::Context *context, gl::InfoLog &infoLog);
     gl::LinkResult compileComputeExecutable(const gl::Context *context, gl::InfoLog &infoLog);
@@ -387,6 +411,7 @@
                                          const BuiltinInfo &builtins);
     D3DUniform *getD3DUniformByName(const std::string &name);
     D3DUniform *getD3DUniformFromLocation(GLint location);
+    const D3DUniform *getD3DUniformFromLocation(GLint location) const;
 
     void initAttribLocationsToD3DSemantic(const gl::Context *context);
 
diff --git a/src/libANGLE/renderer/d3d/ShaderExecutableD3D.cpp b/src/libANGLE/renderer/d3d/ShaderExecutableD3D.cpp
index 97ffdf5..83a66bd 100644
--- a/src/libANGLE/renderer/d3d/ShaderExecutableD3D.cpp
+++ b/src/libANGLE/renderer/d3d/ShaderExecutableD3D.cpp
@@ -44,9 +44,13 @@
     mDebugInfo += info;
 }
 
-
-UniformStorageD3D::UniformStorageD3D(size_t initialSize) : mSize(initialSize)
+UniformStorageD3D::UniformStorageD3D(size_t initialSize) : mUniformData()
 {
+    bool result = mUniformData.resize(initialSize);
+    ASSERT(result);
+
+    // Uniform data is zero-initialized by default.
+    mUniformData.fill(0);
 }
 
 UniformStorageD3D::~UniformStorageD3D()
@@ -55,7 +59,13 @@
 
 size_t UniformStorageD3D::size() const
 {
-    return mSize;
+    return mUniformData.size();
 }
 
+uint8_t *UniformStorageD3D::getDataPointer(unsigned int registerIndex, unsigned int registerElement)
+{
+    size_t offset = ((registerIndex * 4 + registerElement) * sizeof(float));
+    return mUniformData.data() + offset;
 }
+
+}  // namespace rx
diff --git a/src/libANGLE/renderer/d3d/ShaderExecutableD3D.h b/src/libANGLE/renderer/d3d/ShaderExecutableD3D.h
index 71b83b7..b809771 100644
--- a/src/libANGLE/renderer/d3d/ShaderExecutableD3D.h
+++ b/src/libANGLE/renderer/d3d/ShaderExecutableD3D.h
@@ -10,6 +10,7 @@
 #ifndef LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
 #define LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
 
+#include "common/MemoryBuffer.h"
 #include "common/debug.h"
 
 #include <vector>
@@ -45,10 +46,12 @@
 
     size_t size() const;
 
+    uint8_t *getDataPointer(unsigned int registerIndex, unsigned int registerElement);
+
   private:
-    size_t mSize;
+    angle::MemoryBuffer mUniformData;
 };
 
-}
+}  // namespace rx
 
 #endif // LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
diff --git a/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp b/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
index c0e8a4a..fe97e80 100644
--- a/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
@@ -2171,16 +2171,14 @@
     const d3d11::Buffer *pixelConstantBuffer = nullptr;
     ANGLE_TRY(fragmentUniformStorage->getConstantBuffer(this, &pixelConstantBuffer));
 
-    float(*mapVS)[4] = nullptr;
-    float(*mapPS)[4] = nullptr;
-
     if (totalRegisterCountVS > 0 && vertexUniformsDirty)
     {
         D3D11_MAPPED_SUBRESOURCE map = {0};
         HRESULT result =
             mDeviceContext->Map(vertexConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
         ASSERT(SUCCEEDED(result));
-        mapVS = (float(*)[4])map.pData;
+        memcpy(map.pData, vertexUniformStorage->getDataPointer(0, 0), vertexUniformStorage->size());
+        mDeviceContext->Unmap(vertexConstantBuffer->get(), 0);
     }
 
     if (totalRegisterCountPS > 0 && pixelUniformsDirty)
@@ -2189,39 +2187,8 @@
         HRESULT result =
             mDeviceContext->Map(pixelConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
         ASSERT(SUCCEEDED(result));
-        mapPS = (float(*)[4])map.pData;
-    }
-
-    for (const D3DUniform *uniform : uniformArray)
-    {
-        if (uniform->isSampler())
-            continue;
-
-        unsigned int componentCount = (4 - uniform->registerElement);
-
-        // we assume that uniforms from structs are arranged in struct order in our uniforms list.
-        // otherwise we would overwrite previously written regions of memory.
-
-        if (uniform->isReferencedByVertexShader() && mapVS)
-        {
-            memcpy(&mapVS[uniform->vsRegisterIndex][uniform->registerElement], uniform->data,
-                   uniform->registerCount * sizeof(float) * componentCount);
-        }
-
-        if (uniform->isReferencedByFragmentShader() && mapPS)
-        {
-            memcpy(&mapPS[uniform->psRegisterIndex][uniform->registerElement], uniform->data,
-                   uniform->registerCount * sizeof(float) * componentCount);
-        }
-    }
-
-    if (mapVS)
-    {
-        mDeviceContext->Unmap(vertexConstantBuffer->get(), 0);
-    }
-
-    if (mapPS)
-    {
+        memcpy(map.pData, fragmentUniformStorage->getDataPointer(0, 0),
+               fragmentUniformStorage->size());
         mDeviceContext->Unmap(pixelConstantBuffer->get(), 0);
     }
 
@@ -4369,23 +4336,8 @@
         HRESULT result =
             mDeviceContext->Map(computeConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
         ASSERT(SUCCEEDED(result));
-        auto *mapCS = static_cast<float(*)[4]>(map.pData);
-
-        ASSERT(mapCS);
-        for (const D3DUniform *uniform : uniformArray)
-        {
-            ASSERT(uniform->isReferencedByComputeShader());
-
-            if (uniform->isSampler())
-            {
-                continue;
-            }
-
-            unsigned int componentCount = (4 - uniform->registerCount);
-            memcpy(&mapCS[uniform->csRegisterIndex][uniform->registerElement], uniform->data,
-                   uniform->registerCount * sizeof(float) * componentCount);
-        }
-
+        memcpy(map.pData, computeUniformStorage->getDataPointer(0, 0),
+               computeUniformStorage->size());
         mDeviceContext->Unmap(computeConstantBuffer, 0);
     }
 
diff --git a/src/libANGLE/renderer/d3d/d3d9/Renderer9.cpp b/src/libANGLE/renderer/d3d/d3d9/Renderer9.cpp
index 6e83fec..2e8bd2c 100644
--- a/src/libANGLE/renderer/d3d/d3d9/Renderer9.cpp
+++ b/src/libANGLE/renderer/d3d/d3d9/Renderer9.cpp
@@ -1854,8 +1854,13 @@
         if (!targetUniform->dirty)
             continue;
 
-        GLfloat *f = (GLfloat *)targetUniform->data;
-        GLint *i   = (GLint *)targetUniform->data;
+        // Built-in uniforms must be skipped.
+        if (!targetUniform->isReferencedByFragmentShader() &&
+            !targetUniform->isReferencedByVertexShader())
+            continue;
+
+        const GLfloat *f = reinterpret_cast<const GLfloat *>(targetUniform->firstNonNullData());
+        const GLint *i   = reinterpret_cast<const GLint *>(targetUniform->firstNonNullData());
 
         switch (targetUniform->type)
         {
diff --git a/src/tests/gl_tests/UniformTest.cpp b/src/tests/gl_tests/UniformTest.cpp
index 2611738..50e3233 100644
--- a/src/tests/gl_tests/UniformTest.cpp
+++ b/src/tests/gl_tests/UniformTest.cpp
@@ -416,7 +416,7 @@
 };
 
 // Test queries for transposed arrays of non-square matrix uniforms.
-TEST_P(UniformTestES3, TranposedMatrixArrayUniformStateQuery)
+TEST_P(UniformTestES3, TransposedMatrixArrayUniformStateQuery)
 {
     const std::string &vertexShader =
         "#version 300 es\n"