ES31: Support atomic functions on D3D11 - Part I This patch is the first one of the implementation of atomic functions in D3D11. There are mainly two differences in the usage of GLSL and HLSL atomic functions: 1. All GLSL atomic functions have return values, which all represent the original value of the shared or ssbo variable; while all HLSL atomic functions don't, and the original value can be stored in the last parameter of the function call. 2. For HLSL atomic functions, the last parameter that stores the original value is optional except for InterlockedExchange and InterlockedCompareExchange. Missing original_value in the call of InterlockedExchange and InterlockedCompareExchange results in a compile error from HLSL compiler. To handle these differences, we plan to implement the translation in two steps: 1. Support direct translations from GLSL atomic functions to HLSL ones. Direct translation can only handle the following two situations: (1) The sentence is a GLSL atomic function call without requesting a return value and it is not atomicExchange or atomicCompSwap: e.g. GLSL: atomicAdd(mem, value); -> HLSL: InterlockedAdd(mem, value); (2) The sentence is a simple assignment expression: its right is a GLSL atomic function call and its left is a declared variable. e.g. GLSL: oldValue = atomicAdd(mem, value); -> HLSL: InterlockedAdd(mem, value, oldValue); 2. Support atomic functions in the situations that don't support direct translations. We will modify the intermediate tree to make direct translation work on all these situations. e.g. atomicExchange(mem, value); -> int oldValue; oldValue = atomicExchange(mem, value); int oldValue = atomicAdd(mem, value); -> int oldValue; oldValue = atomicAdd(mem, value); return atomicAdd(mem, value); -> int temp; temp = atomicAdd(mem, value); return temp; for (i = 0; i < atomicAdd(mem, value); ++i) -> int temp; temp = atomicAdd(mem, value); for (i = 0; i < temp; ++i) { ... temp = atomicAdd(mem, value); } int result = isTrue ? atomicAdd(mem, value) : 0; -> int result; if (isTrue) { result = atomicAdd(mem, value); } else { result = 0; } This patch completes Step 1 which mainly focus on the translation from GLSL atomic functions to HLSL ones. BUG=angleproject:2682 TEST=angle_end2end_tests Change-Id: I3b655b6e286dad4fd97f255f7fe87521c94db30c Reviewed-on: https://chromium-review.googlesource.com/1121835 Commit-Queue: Jiawei Shao <jiawei.shao@intel.com> Reviewed-by: Olli Etuaho <oetuaho@nvidia.com>

commit: a6a7842fdbf5e5781786a82ede91771fe5d2c31c [log] [tgz]
author: Jiawei Shao <jiawei.shao@intel.com> Thu Jun 28 08:32:54 2018 +0800
committer: Commit Bot <commit-bot@chromium.org> Wed Jul 11 07:33:59 2018 +0000
tree: 983ea2377ac98f0e085e2bcf4459e10288fc67c7
parent: a2f043d847dd7ca53ea43c78b79e8709923d7a4d [diff] [blame]
diff --git a/src/tests/gl_tests/ComputeShaderTest.cpp b/src/tests/gl_tests/ComputeShaderTest.cpp
index bade1f5..a22eaad 100644
--- a/src/tests/gl_tests/ComputeShaderTest.cpp
+++ b/src/tests/gl_tests/ComputeShaderTest.cpp

@@ -20,34 +20,35 @@
   protected:
     ComputeShaderTest() {}
 
-    template <GLint kWidth, GLint kHeight>
+    template <class T, GLint kWidth, GLint kHeight>
     void runSharedMemoryTest(const char *csSource,
-                             const std::array<GLuint, kWidth * kHeight> &inputData,
-                             const std::array<GLuint, kWidth * kHeight> &expectedValues)
+                             GLenum internalFormat,
+                             GLenum format,
+                             const std::array<T, kWidth * kHeight> &inputData,
+                             const std::array<T, kWidth * kHeight> &expectedValues)
     {
         GLTexture texture[2];
         GLFramebuffer framebuffer;
 
         glBindTexture(GL_TEXTURE_2D, texture[0]);
-        glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
+        glTexStorage2D(GL_TEXTURE_2D, 1, internalFormat, kWidth, kHeight);
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, format,
                         inputData.data());
         EXPECT_GL_NO_ERROR();
 
-        constexpr GLuint initData[kWidth * kHeight] = {};
+        constexpr T initData[kWidth * kHeight] = {};
         glBindTexture(GL_TEXTURE_2D, texture[1]);
-        glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
-                        initData);
+        glTexStorage2D(GL_TEXTURE_2D, 1, internalFormat, kWidth, kHeight);
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, format, initData);
         EXPECT_GL_NO_ERROR();
 
         ANGLE_GL_COMPUTE_PROGRAM(program, csSource);
         glUseProgram(program.get());
 
-        glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
+        glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, internalFormat);
         EXPECT_GL_NO_ERROR();
 
-        glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
+        glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, internalFormat);
         EXPECT_GL_NO_ERROR();
 
         glDispatchCompute(1, 1, 1);
@@ -55,14 +56,14 @@
 
         glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
 
-        GLuint outputValues[kWidth * kHeight];
+        T outputValues[kWidth * kHeight] = {};
         glUseProgram(0);
         glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
 
         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture[1],
                                0);
         EXPECT_GL_NO_ERROR();
-        glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT, outputValues);
+        glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, format, outputValues);
         EXPECT_GL_NO_ERROR();
 
         for (int i = 0; i < kWidth * kHeight; i++)
@@ -1386,7 +1387,8 @@
 
     const std::array<GLuint, 4> inputData      = {{250, 200, 150, 100}};
     const std::array<GLuint, 4> expectedValues = {{250, 200, 150, 250}};
-    runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
+    runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
 }
 
 // Verify shared non-struct array variables can work correctly.
@@ -1410,7 +1412,8 @@
 
     const std::array<GLuint, 4> inputData      = {{250, 200, 150, 100}};
     const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
-    runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
+    runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
 }
 
 // Verify shared struct array variables work correctly.
@@ -1438,7 +1441,111 @@
 
     const std::array<GLuint, 4> inputData      = {{250, 200, 150, 100}};
     const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
-    runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
+    runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
+}
+
+// Verify using atomic functions without return value can work correctly.
+// TODO(jiawei.shao@intel.com): add test on atomicExchange and atomicCompSwap.
+TEST_P(ComputeShaderTest, AtomicFunctionsNoReturnValue)
+{
+    // TODO(jiawei.shao@intel.com): find out why this shader causes a link error on Android Nexus 5
+    // bot.
+    ANGLE_SKIP_TEST_IF(IsAndroid());
+
+    const char kCSShader[] =
+        R"(#version 310 es
+        layout (local_size_x = 6, local_size_y = 1, local_size_z = 1) in;
+        layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
+        layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
+
+        const uint kSumIndex = 0u;
+        const uint kMinIndex = 1u;
+        const uint kMaxIndex = 2u;
+        const uint kOrIndex = 3u;
+        const uint kAndIndex = 4u;
+        const uint kXorIndex = 5u;
+
+        shared highp uint results[6];
+
+        void main()
+        {
+            if (gl_LocalInvocationID.x == kMinIndex || gl_LocalInvocationID.x == kAndIndex)
+            {
+                results[gl_LocalInvocationID.x] = 0xFFFFu;
+            }
+            else
+            {
+                results[gl_LocalInvocationID.x] = 0u;
+            }
+            memoryBarrierShared();
+            barrier();
+
+            uint value = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
+            atomicAdd(results[kSumIndex], value);
+            atomicMin(results[kMinIndex], value);
+            atomicMax(results[kMaxIndex], value);
+            atomicOr(results[kOrIndex], value);
+            atomicAnd(results[kAndIndex], value);
+            atomicXor(results[kXorIndex], value);
+            memoryBarrierShared();
+            barrier();
+
+            imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
+                       uvec4(results[gl_LocalInvocationID.x]));
+        })";
+
+    const std::array<GLuint, 6> inputData      = {{1, 2, 4, 8, 16, 32}};
+    const std::array<GLuint, 6> expectedValues = {{63, 1, 32, 63, 0, 63}};
+    runSharedMemoryTest<GLuint, 6, 1>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
+                                      expectedValues);
+}
+
+// Verify using atomic functions in a non-initializer single assignment can work correctly.
+TEST_P(ComputeShaderTest, AtomicFunctionsInNonInitializerSingleAssignment)
+{
+    const char kCSShader[] =
+        R"(#version 310 es
+        layout (local_size_x = 9, local_size_y = 1, local_size_z = 1) in;
+        layout (r32i, binding = 0) readonly uniform highp iimage2D srcImage;
+        layout (r32i, binding = 1) writeonly uniform highp iimage2D dstImage;
+
+        shared highp int sharedVariable;
+
+        shared highp int inputData[9];
+        shared highp int outputData[9];
+
+        void main()
+        {
+            int inputValue = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
+            inputData[gl_LocalInvocationID.x] = inputValue;
+            memoryBarrierShared();
+            barrier();
+
+            if (gl_LocalInvocationID.x == 0u)
+            {
+                sharedVariable = 0;
+
+                outputData[0] = atomicAdd(sharedVariable, inputData[0]);
+                outputData[1] = atomicMin(sharedVariable, inputData[1]);
+                outputData[2] = atomicMax(sharedVariable, inputData[2]);
+                outputData[3] = atomicAnd(sharedVariable, inputData[3]);
+                outputData[4] = atomicOr(sharedVariable, inputData[4]);
+                outputData[5] = atomicXor(sharedVariable, inputData[5]);
+                outputData[6] = atomicExchange(sharedVariable, inputData[6]);
+                outputData[7] = atomicCompSwap(sharedVariable, 64, inputData[7]);
+                outputData[8] = atomicAdd(sharedVariable, inputData[8]);
+            }
+            memoryBarrierShared();
+            barrier();
+
+            imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
+                       ivec4(outputData[gl_LocalInvocationID.x]));
+        })";
+
+    const std::array<GLint, 9> inputData      = {{1, 2, 4, 8, 16, 32, 64, 128, 1}};
+    const std::array<GLint, 9> expectedValues = {{0, 1, 1, 4, 0, 16, 48, 64, 128}};
+    runSharedMemoryTest<GLint, 9, 1>(kCSShader, GL_R32I, GL_INT, inputData, expectedValues);
 }
 
 // Check that it is not possible to create a compute shader when the context does not support ES
commit	a6a7842fdbf5e5781786a82ede91771fe5d2c31c	[log] [tgz]
author	Jiawei Shao <jiawei.shao@intel.com>	Thu Jun 28 08:32:54 2018 +0800
committer	Commit Bot <commit-bot@chromium.org>	Wed Jul 11 07:33:59 2018 +0000
tree	983ea2377ac98f0e085e2bcf4459e10288fc67c7
parent	a2f043d847dd7ca53ea43c78b79e8709923d7a4d [diff] [blame]