Redesign samplers in shaders on D3D11

Translation of samplers to HLSL on D3D11 is changed as follows:

Instead of passing around HLSL sampler and HLSL texture references in
shaders, all references to ESSL samplers are converted to constant
indices within the shader body. Each ESSL sampler is identified by an
unique index. In the code generated to implement ESSL texture functions,
these indices are used to index arrays of HLSL samplers and HLSL
textures to get the sampler and texture to use.

HLSL textures and samplers are grouped into arrays by their types. Each
unique combination of a HLSL texture type + HLSL sampler type gets its
own array. To convert a unique sampler index to an index to one of these
arrays, a constant offset is applied. In the most common case of a 2D
texture and a regular (non-comparison) sampler, the index offset is
always zero and is omitted.

The end goal of this refactoring is to make adding extra metadata for
samplers easier. The unique sampler index can be used in follow-up
changes to index an array of metadata passed in uniforms, which can
contain such things as the base level of the texture.

This does not solve the issues with samplers in structs.

The interface from the point of view of libANGLE is still exactly the
same, the only thing that changes is how samplers are handled inside the
shader.

On feature level 9_3, the D3D compiler has a bug where it can report that
the maximum sampler index is exceeded when in fact it is not. This can
happen when an array of samplers is declared in the shader. Because of
this the new approach can't be used on D3D11 feature level 9_3, but it
will continue using the old approach instead.

BUG=angleproject:1261
TEST=angle_end2end_tests,
     dEQP-GLES3.functional.shaders.texture_functions.* (no regressions)
     dEQP-GLES3.functional.texture.units.* (no regressions)

Change-Id: I5fbb0c4280000202dc2795a628b56bd8194ef96f
Reviewed-on: https://chromium-review.googlesource.com/320571
Reviewed-by: Zhenyao Mo <zmo@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Tested-by: Olli Etuaho <oetuaho@nvidia.com>
Commit-Queue: Olli Etuaho <oetuaho@nvidia.com>
Tryjob-Request: Olli Etuaho <oetuaho@nvidia.com>
diff --git a/src/compiler/translator/CodeGen.cpp b/src/compiler/translator/CodeGen.cpp
index 681e7ae..f099bcc 100644
--- a/src/compiler/translator/CodeGen.cpp
+++ b/src/compiler/translator/CodeGen.cpp
@@ -51,8 +51,9 @@
         // configuration. Return NULL per the ShConstructCompiler API.
         return nullptr;
 #endif // ANGLE_ENABLE_GLSL
-      case SH_HLSL9_OUTPUT:
-      case SH_HLSL11_OUTPUT:
+      case SH_HLSL_3_0_OUTPUT:
+      case SH_HLSL_4_1_OUTPUT:
+      case SH_HLSL_4_0_FL9_3_OUTPUT:
 #ifdef ANGLE_ENABLE_HLSL
         return new TranslatorHLSL(type, spec, output);
 #else
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 634f3df..170875e 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -83,19 +83,9 @@
 {
     TString name = "gl_texture";
 
-    if (IsSampler2D(sampler))
-    {
-        name += "2D";
-    }
-    else if (IsSampler3D(sampler))
-    {
-        name += "3D";
-    }
-    else if (IsSamplerCube(sampler))
-    {
-        name += "Cube";
-    }
-    else UNREACHABLE();
+    // We need to include full the sampler type in the function name to make the signature unique
+    // on D3D11, where samplers are passed to texture functions as indices.
+    name += TextureTypeSuffix(this->sampler);
 
     if (proj)
     {
@@ -185,7 +175,7 @@
     mStructureHLSL = new StructureHLSL;
     mUniformHLSL = new UniformHLSL(mStructureHLSL, outputType, uniforms);
 
-    if (mOutputType == SH_HLSL9_OUTPUT)
+    if (mOutputType == SH_HLSL_3_0_OUTPUT)
     {
         // Fragment shaders need dx_DepthRange, dx_ViewCoords and dx_DepthFront.
         // Vertex shaders need a slightly different set: dx_DepthRange, dx_ViewCoords and dx_ViewAdjust.
@@ -372,7 +362,7 @@
 
     out << mStructureHLSL->structsHeader();
 
-    out << mUniformHLSL->uniformsHeader(mOutputType, mReferencedUniforms);
+    mUniformHLSL->uniformsHeader(out, mOutputType, mReferencedUniforms);
     out << mUniformHLSL->interfaceBlocksHeader(mReferencedInterfaceBlocks);
 
     if (!mEqualityFunctions.empty())
@@ -495,7 +485,7 @@
                    "\n";
         }
 
-        if (mOutputType == SH_HLSL11_OUTPUT)
+        if (mOutputType == SH_HLSL_4_1_OUTPUT || mOutputType == SH_HLSL_4_0_FL9_3_OUTPUT)
         {
             out << "cbuffer DriverConstants : register(b1)\n"
                    "{\n";
@@ -599,7 +589,7 @@
                    "\n";
         }
 
-        if (mOutputType == SH_HLSL11_OUTPUT)
+        if (mOutputType == SH_HLSL_4_1_OUTPUT || mOutputType == SH_HLSL_4_0_FL9_3_OUTPUT)
         {
             out << "cbuffer DriverConstants : register(b1)\n"
                     "{\n";
@@ -699,7 +689,7 @@
         // Argument list
         int hlslCoords = 4;
 
-        if (mOutputType == SH_HLSL9_OUTPUT)
+        if (mOutputType == SH_HLSL_3_0_OUTPUT)
         {
             switch(textureFunction->sampler)
             {
@@ -718,29 +708,20 @@
               default: UNREACHABLE();
             }
         }
-        else if (mOutputType == SH_HLSL11_OUTPUT)
+        else
         {
-            switch(textureFunction->sampler)
+            hlslCoords = HLSLTextureCoordsCount(textureFunction->sampler);
+            if (mOutputType == SH_HLSL_4_0_FL9_3_OUTPUT)
             {
-              case EbtSampler2D:            out << "Texture2D x, SamplerState s";                hlslCoords = 2; break;
-              case EbtSampler3D:            out << "Texture3D x, SamplerState s";                hlslCoords = 3; break;
-              case EbtSamplerCube:          out << "TextureCube x, SamplerState s";              hlslCoords = 3; break;
-              case EbtSampler2DArray:       out << "Texture2DArray x, SamplerState s";           hlslCoords = 3; break;
-              case EbtISampler2D:           out << "Texture2D<int4> x, SamplerState s";          hlslCoords = 2; break;
-              case EbtISampler3D:           out << "Texture3D<int4> x, SamplerState s";          hlslCoords = 3; break;
-              case EbtISamplerCube:         out << "Texture2DArray<int4> x, SamplerState s";     hlslCoords = 3; break;
-              case EbtISampler2DArray:      out << "Texture2DArray<int4> x, SamplerState s";     hlslCoords = 3; break;
-              case EbtUSampler2D:           out << "Texture2D<uint4> x, SamplerState s";         hlslCoords = 2; break;
-              case EbtUSampler3D:           out << "Texture3D<uint4> x, SamplerState s";         hlslCoords = 3; break;
-              case EbtUSamplerCube:         out << "Texture2DArray<uint4> x, SamplerState s";    hlslCoords = 3; break;
-              case EbtUSampler2DArray:      out << "Texture2DArray<uint4> x, SamplerState s";    hlslCoords = 3; break;
-              case EbtSampler2DShadow:      out << "Texture2D x, SamplerComparisonState s";      hlslCoords = 2; break;
-              case EbtSamplerCubeShadow:    out << "TextureCube x, SamplerComparisonState s";    hlslCoords = 3; break;
-              case EbtSampler2DArrayShadow: out << "Texture2DArray x, SamplerComparisonState s"; hlslCoords = 3; break;
-              default: UNREACHABLE();
+                out << TextureString(textureFunction->sampler) << " x, "
+                    << SamplerString(textureFunction->sampler) << " s";
+            }
+            else
+            {
+                ASSERT(mOutputType == SH_HLSL_4_1_OUTPUT);
+                out << "const uint samplerIndex";
             }
         }
-        else UNREACHABLE();
 
         if (textureFunction->method == TextureFunction::FETCH)   // Integer coordinates
         {
@@ -831,6 +812,31 @@
         out << ")\n"
                "{\n";
 
+        // In some cases we use a variable to store the texture/sampler objects, but to work around
+        // a D3D11 compiler bug related to discard inside a loop that is conditional on texture
+        // sampling we need to call the function directly on a reference to the array. The bug was
+        // found using dEQP-GLES3.functional.shaders.discard*loop_texture* tests.
+        TString textureReference("x");
+        TString samplerReference("s");
+        if (mOutputType == SH_HLSL_4_1_OUTPUT)
+        {
+            TString suffix = TextureGroupSuffix(textureFunction->sampler);
+            if (TextureGroup(textureFunction->sampler) == HLSL_TEXTURE_2D)
+            {
+                textureReference = TString("textures") + suffix + "[samplerIndex]";
+                samplerReference = TString("samplers") + suffix + "[samplerIndex]";
+            }
+            else
+            {
+                out << "    const uint textureIndex = samplerIndex - textureIndexOffset" << suffix
+                    << ";\n";
+                textureReference = TString("textures") + suffix + "[textureIndex]";
+                out << "    const uint samplerArrayIndex = samplerIndex - samplerIndexOffset"
+                    << suffix << ";\n";
+                samplerReference = TString("samplers") + suffix + "[samplerArrayIndex]";
+            }
+        }
+
         if (textureFunction->method == TextureFunction::SIZE)
         {
             if (IsSampler2D(textureFunction->sampler) || IsSamplerCube(textureFunction->sampler))
@@ -838,18 +844,21 @@
                 if (IsSamplerArray(textureFunction->sampler))
                 {
                     out << "    uint width; uint height; uint layers; uint numberOfLevels;\n"
-                           "    x.GetDimensions(lod, width, height, layers, numberOfLevels);\n";
+                        << "    " << textureReference
+                        << ".GetDimensions(lod, width, height, layers, numberOfLevels);\n";
                 }
                 else
                 {
                     out << "    uint width; uint height; uint numberOfLevels;\n"
-                           "    x.GetDimensions(lod, width, height, numberOfLevels);\n";
+                        << "    " << textureReference
+                        << ".GetDimensions(lod, width, height, numberOfLevels);\n";
                 }
             }
             else if (IsSampler3D(textureFunction->sampler))
             {
                 out << "    uint width; uint height; uint depth; uint numberOfLevels;\n"
-                       "    x.GetDimensions(lod, width, height, depth, numberOfLevels);\n";
+                    << "    " << textureReference
+                    << ".GetDimensions(lod, width, height, depth, numberOfLevels);\n";
             }
             else UNREACHABLE();
 
@@ -881,7 +890,8 @@
 
                 out << "    uint mip = 0;\n";
 
-                out << "    x.GetDimensions(mip, width, height, layers, levels);\n";
+                out << "    " << textureReference
+                    << ".GetDimensions(mip, width, height, layers, levels);\n";
 
                 out << "    bool xMajor = abs(t.x) > abs(t.y) && abs(t.x) > abs(t.z);\n";
                 out << "    bool yMajor = abs(t.y) > abs(t.z) && abs(t.y) > abs(t.x);\n";
@@ -911,7 +921,8 @@
                            "    float2 dy = ddy(tSized);\n"
                            "    float lod = 0.5f * log2(max(dot(dx, dx), dot(dy, dy)));\n"
                            "    mip = uint(min(max(round(lod), 0), levels - 1));\n"
-                           "    x.GetDimensions(mip, width, height, layers, levels);\n";
+                        << "    " << textureReference
+                        << ".GetDimensions(mip, width, height, layers, levels);\n";
                 }
             }
             else if (IsIntegerSampler(textureFunction->sampler) &&
@@ -934,7 +945,8 @@
                         else
                         {
 
-                            out << "    x.GetDimensions(0, width, height, layers, levels);\n";
+                            out << "    " << textureReference
+                                << ".GetDimensions(0, width, height, layers, levels);\n";
                             if (textureFunction->method == TextureFunction::IMPLICIT ||
                                 textureFunction->method == TextureFunction::BIAS)
                             {
@@ -956,7 +968,8 @@
                             out << "    uint mip = uint(min(max(round(lod), 0), levels - 1));\n";
                         }
 
-                        out << "    x.GetDimensions(mip, width, height, layers, levels);\n";
+                        out << "    " << textureReference
+                            << ".GetDimensions(mip, width, height, layers, levels);\n";
                     }
                     else
                     {
@@ -972,7 +985,8 @@
                         }
                         else
                         {
-                            out << "    x.GetDimensions(0, width, height, levels);\n";
+                            out << "    " << textureReference
+                                << ".GetDimensions(0, width, height, levels);\n";
 
                             if (textureFunction->method == TextureFunction::IMPLICIT ||
                                 textureFunction->method == TextureFunction::BIAS)
@@ -995,7 +1009,8 @@
                             out << "    uint mip = uint(min(max(round(lod), 0), levels - 1));\n";
                         }
 
-                        out << "    x.GetDimensions(mip, width, height, levels);\n";
+                        out << "    " << textureReference
+                            << ".GetDimensions(mip, width, height, levels);\n";
                     }
                 }
                 else if (IsSampler3D(textureFunction->sampler))
@@ -1012,7 +1027,8 @@
                     }
                     else
                     {
-                        out << "    x.GetDimensions(0, width, height, depth, levels);\n";
+                        out << "    " << textureReference
+                            << ".GetDimensions(0, width, height, depth, levels);\n";
 
                         if (textureFunction->method == TextureFunction::IMPLICIT ||
                             textureFunction->method == TextureFunction::BIAS)
@@ -1036,7 +1052,8 @@
                         out << "    uint mip = uint(min(max(round(lod), 0), levels - 1));\n";
                     }
 
-                    out << "    x.GetDimensions(mip, width, height, depth, levels);\n";
+                    out << "    " << textureReference
+                        << ".GetDimensions(mip, width, height, depth, levels);\n";
                 }
                 else UNREACHABLE();
             }
@@ -1044,7 +1061,7 @@
             out << "    return ";
 
             // HLSL intrinsic
-            if (mOutputType == SH_HLSL9_OUTPUT)
+            if (mOutputType == SH_HLSL_3_0_OUTPUT)
             {
                 switch(textureFunction->sampler)
                 {
@@ -1055,45 +1072,71 @@
 
                 switch(textureFunction->method)
                 {
-                  case TextureFunction::IMPLICIT: out << "(s, ";     break;
-                  case TextureFunction::BIAS:     out << "bias(s, "; break;
-                  case TextureFunction::LOD:      out << "lod(s, ";  break;
-                  case TextureFunction::LOD0:     out << "lod(s, ";  break;
-                  case TextureFunction::LOD0BIAS: out << "lod(s, ";  break;
+                    case TextureFunction::IMPLICIT:
+                        out << "(" << samplerReference << ", ";
+                        break;
+                    case TextureFunction::BIAS:
+                        out << "bias(" << samplerReference << ", ";
+                        break;
+                    case TextureFunction::LOD:
+                        out << "lod(" << samplerReference << ", ";
+                        break;
+                    case TextureFunction::LOD0:
+                        out << "lod(" << samplerReference << ", ";
+                        break;
+                    case TextureFunction::LOD0BIAS:
+                        out << "lod(" << samplerReference << ", ";
+                        break;
                   default: UNREACHABLE();
                 }
             }
-            else if (mOutputType == SH_HLSL11_OUTPUT)
+            else if (mOutputType == SH_HLSL_4_1_OUTPUT || mOutputType == SH_HLSL_4_0_FL9_3_OUTPUT)
             {
                 if (textureFunction->method == TextureFunction::GRAD)
                 {
                     if (IsIntegerSampler(textureFunction->sampler))
                     {
-                        out << "x.Load(";
+                        out << "" << textureReference << ".Load(";
                     }
                     else if (IsShadowSampler(textureFunction->sampler))
                     {
-                        out << "x.SampleCmpLevelZero(s, ";
+                        out << "" << textureReference << ".SampleCmpLevelZero(" << samplerReference
+                            << ", ";
                     }
                     else
                     {
-                        out << "x.SampleGrad(s, ";
+                        out << "" << textureReference << ".SampleGrad(" << samplerReference << ", ";
                     }
                 }
                 else if (IsIntegerSampler(textureFunction->sampler) ||
                          textureFunction->method == TextureFunction::FETCH)
                 {
-                    out << "x.Load(";
+                    out << "" << textureReference << ".Load(";
                 }
                 else if (IsShadowSampler(textureFunction->sampler))
                 {
                     switch(textureFunction->method)
                     {
-                      case TextureFunction::IMPLICIT: out << "x.SampleCmp(s, ";          break;
-                      case TextureFunction::BIAS:     out << "x.SampleCmp(s, ";          break;
-                      case TextureFunction::LOD:      out << "x.SampleCmp(s, ";          break;
-                      case TextureFunction::LOD0:     out << "x.SampleCmpLevelZero(s, "; break;
-                      case TextureFunction::LOD0BIAS: out << "x.SampleCmpLevelZero(s, "; break;
+                        case TextureFunction::IMPLICIT:
+                            out << "" << textureReference << ".SampleCmp(" << samplerReference
+                                << ", ";
+                            break;
+                        case TextureFunction::BIAS:
+                            out << "" << textureReference << ".SampleCmp(" << samplerReference
+                                << ", ";
+                            break;
+                        case TextureFunction::LOD:
+                            out << "" << textureReference << ".SampleCmp(" << samplerReference
+                                << ", ";
+                            break;
+                        case TextureFunction::LOD0:
+                            out << "" << textureReference << ".SampleCmpLevelZero("
+                                << samplerReference << ", ";
+                            break;
+                        case TextureFunction::LOD0BIAS:
+                            out << "" << textureReference << ".SampleCmpLevelZero("
+                                << samplerReference << ", ";
+                            break;
                       default: UNREACHABLE();
                     }
                 }
@@ -1101,11 +1144,25 @@
                 {
                     switch(textureFunction->method)
                     {
-                      case TextureFunction::IMPLICIT: out << "x.Sample(s, ";      break;
-                      case TextureFunction::BIAS:     out << "x.SampleBias(s, ";  break;
-                      case TextureFunction::LOD:      out << "x.SampleLevel(s, "; break;
-                      case TextureFunction::LOD0:     out << "x.SampleLevel(s, "; break;
-                      case TextureFunction::LOD0BIAS: out << "x.SampleLevel(s, "; break;
+                        case TextureFunction::IMPLICIT:
+                            out << "" << textureReference << ".Sample(" << samplerReference << ", ";
+                            break;
+                        case TextureFunction::BIAS:
+                            out << "" << textureReference << ".SampleBias(" << samplerReference
+                                << ", ";
+                            break;
+                        case TextureFunction::LOD:
+                            out << "" << textureReference << ".SampleLevel(" << samplerReference
+                                << ", ";
+                            break;
+                        case TextureFunction::LOD0:
+                            out << "" << textureReference << ".SampleLevel(" << samplerReference
+                                << ", ";
+                            break;
+                        case TextureFunction::LOD0BIAS:
+                            out << "" << textureReference << ".SampleLevel(" << samplerReference
+                                << ", ";
+                            break;
                       default: UNREACHABLE();
                     }
                 }
@@ -1175,7 +1232,7 @@
 
             out << addressx + ("t.x" + proj) + close + ", " + addressy + ("t.y" + proj) + close;
 
-            if (mOutputType == SH_HLSL9_OUTPUT)
+            if (mOutputType == SH_HLSL_3_0_OUTPUT)
             {
                 if (hlslCoords >= 3)
                 {
@@ -1203,7 +1260,7 @@
 
                 out << "));\n";
             }
-            else if (mOutputType == SH_HLSL11_OUTPUT)
+            else if (mOutputType == SH_HLSL_4_1_OUTPUT || mOutputType == SH_HLSL_4_0_FL9_3_OUTPUT)
             {
                 if (hlslCoords >= 3)
                 {
@@ -2393,7 +2450,8 @@
 
             for (TIntermSequence::iterator arg = arguments->begin(); arg != arguments->end(); arg++)
             {
-                if (mOutputType == SH_HLSL11_OUTPUT && IsSampler((*arg)->getAsTyped()->getBasicType()))
+                if (mOutputType == SH_HLSL_4_0_FL9_3_OUTPUT &&
+                    IsSampler((*arg)->getAsTyped()->getBasicType()))
                 {
                     out << "texture_";
                     (*arg)->traverse(this);
@@ -2727,7 +2785,7 @@
 
     TInfoSinkBase &out = getInfoSink();
 
-    if (mOutputType == SH_HLSL9_OUTPUT)
+    if (mOutputType == SH_HLSL_3_0_OUTPUT)
     {
         if (handleExcessiveLoop(out, node))
         {
@@ -3154,11 +3212,21 @@
         nameStr = DecorateIfNeeded(name);
     }
 
-    if (mOutputType == SH_HLSL11_OUTPUT && IsSampler(type.getBasicType()))
+    if (IsSampler(type.getBasicType()))
     {
-        return QualifierString(qualifier) + " " + TextureString(type) + " texture_" + nameStr +
-               ArrayString(type) + ", " + QualifierString(qualifier) + " " + SamplerString(type) +
-               " sampler_" + nameStr + ArrayString(type);
+        if (mOutputType == SH_HLSL_4_1_OUTPUT)
+        {
+            // Samplers are passed as indices to the sampler array.
+            ASSERT(qualifier != EvqOut && qualifier != EvqInOut);
+            return "const uint " + nameStr + ArrayString(type);
+        }
+        if (mOutputType == SH_HLSL_4_0_FL9_3_OUTPUT)
+        {
+            return QualifierString(qualifier) + " " + TextureString(type.getBasicType()) +
+                   " texture_" + nameStr + ArrayString(type) + ", " + QualifierString(qualifier) +
+                   " " + SamplerString(type.getBasicType()) + " sampler_" + nameStr +
+                   ArrayString(type);
+        }
     }
 
     return QualifierString(qualifier) + " " + TypeString(type) + " " + nameStr + ArrayString(type);
diff --git a/src/compiler/translator/TranslatorHLSL.cpp b/src/compiler/translator/TranslatorHLSL.cpp
index 462f887..c5d18d2 100644
--- a/src/compiler/translator/TranslatorHLSL.cpp
+++ b/src/compiler/translator/TranslatorHLSL.cpp
@@ -47,7 +47,7 @@
 
     // Work around D3D9 bug that would manifest in vertex shaders with selection blocks which
     // use a vertex attribute as a condition, and some related computation in the else block.
-    if (getOutputType() == SH_HLSL9_OUTPUT && getShaderType() == GL_VERTEX_SHADER)
+    if (getOutputType() == SH_HLSL_3_0_OUTPUT && getShaderType() == GL_VERTEX_SHADER)
     {
         sh::RewriteElseBlocks(root, getTemporaryIndex());
     }
diff --git a/src/compiler/translator/UniformHLSL.cpp b/src/compiler/translator/UniformHLSL.cpp
index 71659fe..20961c4 100644
--- a/src/compiler/translator/UniformHLSL.cpp
+++ b/src/compiler/translator/UniformHLSL.cpp
@@ -93,7 +93,9 @@
     return NULL;
 }
 
-unsigned int UniformHLSL::declareUniformAndAssignRegister(const TType &type, const TString &name)
+unsigned int UniformHLSL::declareUniformAndAssignRegister(const TType &type,
+                                                          const TString &name,
+                                                          unsigned int *registerCount)
 {
     unsigned int registerIndex = (IsSampler(type.getBasicType()) ? mSamplerRegister : mUniformRegister);
 
@@ -102,43 +104,119 @@
 
     mUniformRegisterMap[uniform->name] = registerIndex;
 
-    unsigned int registerCount = HLSLVariableRegisterCount(*uniform, mOutputType);
+    ASSERT(registerCount);
+    *registerCount = HLSLVariableRegisterCount(*uniform, mOutputType);
 
     if (gl::IsSamplerType(uniform->type))
     {
-        mSamplerRegister += registerCount;
+        mSamplerRegister += *registerCount;
     }
     else
     {
-        mUniformRegister += registerCount;
+        mUniformRegister += *registerCount;
     }
 
     return registerIndex;
 }
 
-TString UniformHLSL::uniformsHeader(ShShaderOutput outputType, const ReferencedSymbols &referencedUniforms)
+unsigned int UniformHLSL::declareUniformAndAssignRegister(const TType &type, const TString &name)
 {
-    TString uniforms;
+    unsigned int registerCount;
+    return declareUniformAndAssignRegister(type, name, &registerCount);
+}
 
-    for (ReferencedSymbols::const_iterator uniformIt = referencedUniforms.begin();
-         uniformIt != referencedUniforms.end(); uniformIt++)
+void UniformHLSL::outputHLSLSamplerUniformGroup(TInfoSinkBase &out,
+                                                const HLSLTextureSamplerGroup textureGroup,
+                                                const TVector<const TIntermSymbol *> &group,
+                                                unsigned int *groupTextureRegisterIndex)
+{
+    if (group.empty())
     {
-        const TIntermSymbol &uniform = *uniformIt->second;
-        const TType &type = uniform.getType();
-        const TString &name = uniform.getSymbol();
-
-        unsigned int registerIndex = declareUniformAndAssignRegister(type, name);
-
-        if (outputType == SH_HLSL11_OUTPUT && IsSampler(type.getBasicType()))   // Also declare the texture
+        return;
+    }
+    unsigned int groupRegisterCount = 0;
+    for (const TIntermSymbol *uniform : group)
+    {
+        const TType &type   = uniform->getType();
+        const TString &name = uniform->getSymbol();
+        unsigned int registerCount;
+        unsigned int samplerArrayIndex =
+            declareUniformAndAssignRegister(type, name, &registerCount);
+        groupRegisterCount += registerCount;
+        if (type.isArray())
         {
-            uniforms += "uniform " + SamplerString(type) + " sampler_" + DecorateUniform(name, type) + ArrayString(type) +
-                        " : register(s" + str(registerIndex) + ");\n";
-
-            uniforms += "uniform " + TextureString(type) + " texture_" + DecorateUniform(name, type) + ArrayString(type) +
-                        " : register(t" + str(registerIndex) + ");\n";
+            out << "static const uint " << DecorateIfNeeded(uniform->getName()) << ArrayString(type)
+                << " = {";
+            for (int i = 0; i < type.getArraySize(); ++i)
+            {
+                if (i > 0)
+                    out << ", ";
+                out << (samplerArrayIndex + i);
+            }
+            out << "};\n";
         }
         else
         {
+            out << "static const uint " << DecorateIfNeeded(uniform->getName()) << " = "
+                << samplerArrayIndex << ";\n";
+        }
+    }
+    TString suffix = TextureGroupSuffix(textureGroup);
+    // Since HLSL_TEXTURE_2D is the first group, it has a fixed offset of zero.
+    if (textureGroup != HLSL_TEXTURE_2D)
+    {
+        out << "static const uint textureIndexOffset" << suffix << " = "
+            << (*groupTextureRegisterIndex) << ";\n";
+        out << "static const uint samplerIndexOffset" << suffix << " = "
+            << (*groupTextureRegisterIndex) << ";\n";
+    }
+    out << "uniform " << TextureString(textureGroup) << " textures" << suffix << "["
+        << groupRegisterCount << "]"
+        << " : register(t" << (*groupTextureRegisterIndex) << ");\n";
+    out << "uniform " << SamplerString(textureGroup) << " samplers" << suffix << "["
+        << groupRegisterCount << "]"
+        << " : register(s" << (*groupTextureRegisterIndex) << ");\n";
+    *groupTextureRegisterIndex += groupRegisterCount;
+}
+
+void UniformHLSL::uniformsHeader(TInfoSinkBase &out,
+                                 ShShaderOutput outputType,
+                                 const ReferencedSymbols &referencedUniforms)
+{
+    if (!referencedUniforms.empty())
+    {
+        out << "// Uniforms\n\n";
+    }
+    // In the case of HLSL 4, sampler uniforms need to be grouped by type before the code is
+    // written. They are grouped based on the combination of the HLSL texture type and
+    // HLSL sampler type, enumerated in HLSLTextureSamplerGroup.
+    TVector<TVector<const TIntermSymbol *>> groupedSamplerUniforms;
+    groupedSamplerUniforms.resize(HLSL_TEXTURE_MAX + 1);
+    for (auto &uniformIt : referencedUniforms)
+    {
+        // Output regular uniforms. Group sampler uniforms by type.
+        const TIntermSymbol &uniform = *uniformIt.second;
+        const TType &type = uniform.getType();
+        const TString &name = uniform.getSymbol();
+
+        if (outputType == SH_HLSL_4_1_OUTPUT && IsSampler(type.getBasicType()))
+        {
+            HLSLTextureSamplerGroup group = TextureGroup(type.getBasicType());
+            groupedSamplerUniforms[group].push_back(&uniform);
+        }
+        else if (outputType == SH_HLSL_4_0_FL9_3_OUTPUT && IsSampler(type.getBasicType()))
+        {
+            unsigned int registerIndex = declareUniformAndAssignRegister(type, name);
+            out << "uniform " << SamplerString(type.getBasicType()) << " sampler_"
+                << DecorateUniform(name, type) << ArrayString(type) << " : register(s"
+                << str(registerIndex) << ");\n";
+            out << "uniform " << TextureString(type.getBasicType()) << " texture_"
+                << DecorateUniform(name, type) << ArrayString(type) << " : register(t"
+                << str(registerIndex) << ");\n";
+        }
+        else
+        {
+            unsigned int registerIndex  = declareUniformAndAssignRegister(type, name);
             const TStructure *structure = type.getStruct();
             // If this is a nameless struct, we need to use its full definition, rather than its (empty) name.
             // TypeString() will invoke defineNameless in this case; qualifier prefixes are unnecessary for 
@@ -149,11 +227,23 @@
 
             const TString &registerString = TString("register(") + UniformRegisterPrefix(type) + str(registerIndex) + ")";
 
-            uniforms += "uniform " + typeName + " " + DecorateUniform(name, type) + ArrayString(type) + " : " + registerString + ";\n";
+            out << "uniform " << typeName << " " << DecorateUniform(name, type) << ArrayString(type)
+                << " : " << registerString << ";\n";
         }
     }
 
-    return (uniforms.empty() ? "" : ("// Uniforms\n\n" + uniforms));
+    if (outputType == SH_HLSL_4_1_OUTPUT)
+    {
+        unsigned int groupTextureRegisterIndex = 0;
+        // TEXTURE_2D is special, index offset is assumed to be 0 and omitted in that case.
+        ASSERT(HLSL_TEXTURE_MIN == HLSL_TEXTURE_2D);
+        for (int groupId = HLSL_TEXTURE_MIN; groupId < HLSL_TEXTURE_MAX; ++groupId)
+        {
+            outputHLSLSamplerUniformGroup(out, HLSLTextureSamplerGroup(groupId),
+                                          groupedSamplerUniforms[groupId],
+                                          &groupTextureRegisterIndex);
+        }
+    }
 }
 
 TString UniformHLSL::interfaceBlocksHeader(const ReferencedSymbols &referencedInterfaceBlocks)
diff --git a/src/compiler/translator/UniformHLSL.h b/src/compiler/translator/UniformHLSL.h
index 4ab9ccd..0f51f34 100644
--- a/src/compiler/translator/UniformHLSL.h
+++ b/src/compiler/translator/UniformHLSL.h
@@ -11,6 +11,7 @@
 #define COMPILER_TRANSLATOR_UNIFORMHLSL_H_
 
 #include "compiler/translator/OutputHLSL.h"
+#include "compiler/translator/UtilsHLSL.h"
 
 namespace sh
 {
@@ -23,7 +24,13 @@
 
     void reserveUniformRegisters(unsigned int registerCount);
     void reserveInterfaceBlockRegisters(unsigned int registerCount);
-    TString uniformsHeader(ShShaderOutput outputType, const ReferencedSymbols &referencedUniforms);
+    void outputHLSLSamplerUniformGroup(TInfoSinkBase &out,
+                                       const HLSLTextureSamplerGroup textureGroup,
+                                       const TVector<const TIntermSymbol *> &group,
+                                       unsigned int *groupTextureRegisterIndex);
+    void uniformsHeader(TInfoSinkBase &out,
+                        ShShaderOutput outputType,
+                        const ReferencedSymbols &referencedUniforms);
     TString interfaceBlocksHeader(const ReferencedSymbols &referencedInterfaceBlocks);
 
     // Used for direct index references
@@ -45,6 +52,9 @@
     const Uniform *findUniformByName(const TString &name) const;
 
     // Returns the uniform's register index
+    unsigned int declareUniformAndAssignRegister(const TType &type,
+                                                 const TString &name,
+                                                 unsigned int *registerCount);
     unsigned int declareUniformAndAssignRegister(const TType &type, const TString &name);
 
     unsigned int mUniformRegister;
diff --git a/src/compiler/translator/UtilsHLSL.cpp b/src/compiler/translator/UtilsHLSL.cpp
index c0ceef9..404ccee 100644
--- a/src/compiler/translator/UtilsHLSL.cpp
+++ b/src/compiler/translator/UtilsHLSL.cpp
@@ -15,9 +15,9 @@
 namespace sh
 {
 
-TString SamplerString(const TType &type)
+TString SamplerString(const TBasicType type)
 {
-    if (IsShadowSampler(type.getBasicType()))
+    if (IsShadowSampler(type))
     {
         return "SamplerComparisonState";
     }
@@ -27,32 +27,158 @@
     }
 }
 
-TString TextureString(const TType &type)
+TString SamplerString(HLSLTextureSamplerGroup type)
 {
-    switch (type.getBasicType())
+    if (type >= HLSL_COMPARISON_SAMPLER_GROUP_BEGIN && type <= HLSL_COMPARISON_SAMPLER_GROUP_END)
     {
-      case EbtSampler2D:            return "Texture2D";
-      case EbtSamplerCube:          return "TextureCube";
-      case EbtSamplerExternalOES:   return "Texture2D";
-      case EbtSampler2DArray:       return "Texture2DArray";
-      case EbtSampler3D:            return "Texture3D";
-      case EbtISampler2D:           return "Texture2D<int4>";
-      case EbtISampler3D:           return "Texture3D<int4>";
-      case EbtISamplerCube:         return "Texture2DArray<int4>";
-      case EbtISampler2DArray:      return "Texture2DArray<int4>";
-      case EbtUSampler2D:           return "Texture2D<uint4>";
-      case EbtUSampler3D:           return "Texture3D<uint4>";
-      case EbtUSamplerCube:         return "Texture2DArray<uint4>";
-      case EbtUSampler2DArray:      return "Texture2DArray<uint4>";
-      case EbtSampler2DShadow:      return "Texture2D";
-      case EbtSamplerCubeShadow:    return "TextureCube";
-      case EbtSampler2DArrayShadow: return "Texture2DArray";
-      default: UNREACHABLE();
+        return "SamplerComparisonState";
+    }
+    else
+    {
+        return "SamplerState";
+    }
+}
+
+HLSLTextureSamplerGroup TextureGroup(const TBasicType type)
+{
+    switch (type)
+    {
+        case EbtSampler2D:
+            return HLSL_TEXTURE_2D;
+        case EbtSamplerCube:
+            return HLSL_TEXTURE_CUBE;
+        case EbtSamplerExternalOES:
+            return HLSL_TEXTURE_2D;
+        case EbtSampler2DArray:
+            return HLSL_TEXTURE_2D_ARRAY;
+        case EbtSampler3D:
+            return HLSL_TEXTURE_3D;
+        case EbtISampler2D:
+            return HLSL_TEXTURE_2D_INT4;
+        case EbtISampler3D:
+            return HLSL_TEXTURE_3D_INT4;
+        case EbtISamplerCube:
+            return HLSL_TEXTURE_2D_ARRAY_INT4;
+        case EbtISampler2DArray:
+            return HLSL_TEXTURE_2D_ARRAY_INT4;
+        case EbtUSampler2D:
+            return HLSL_TEXTURE_2D_UINT4;
+        case EbtUSampler3D:
+            return HLSL_TEXTURE_3D_UINT4;
+        case EbtUSamplerCube:
+            return HLSL_TEXTURE_2D_ARRAY_UINT4;
+        case EbtUSampler2DArray:
+            return HLSL_TEXTURE_2D_ARRAY_UINT4;
+        case EbtSampler2DShadow:
+            return HLSL_TEXTURE_2D_COMPARISON;
+        case EbtSamplerCubeShadow:
+            return HLSL_TEXTURE_CUBE_COMPARISON;
+        case EbtSampler2DArrayShadow:
+            return HLSL_TEXTURE_2D_ARRAY_COMPARISON;
+        default:
+            UNREACHABLE();
+    }
+    return HLSL_TEXTURE_UNKNOWN;
+}
+
+TString TextureString(const HLSLTextureSamplerGroup type)
+{
+    switch (type)
+    {
+        case HLSL_TEXTURE_2D:
+            return "Texture2D";
+        case HLSL_TEXTURE_CUBE:
+            return "TextureCube";
+        case HLSL_TEXTURE_2D_ARRAY:
+            return "Texture2DArray";
+        case HLSL_TEXTURE_3D:
+            return "Texture3D";
+        case HLSL_TEXTURE_2D_INT4:
+            return "Texture2D<int4>";
+        case HLSL_TEXTURE_3D_INT4:
+            return "Texture3D<int4>";
+        case HLSL_TEXTURE_2D_ARRAY_INT4:
+            return "Texture2DArray<int4>";
+        case HLSL_TEXTURE_2D_UINT4:
+            return "Texture2D<uint4>";
+        case HLSL_TEXTURE_3D_UINT4:
+            return "Texture3D<uint4>";
+        case HLSL_TEXTURE_2D_ARRAY_UINT4:
+            return "Texture2DArray<uint4>";
+        case HLSL_TEXTURE_2D_COMPARISON:
+            return "Texture2D";
+        case HLSL_TEXTURE_CUBE_COMPARISON:
+            return "TextureCube";
+        case HLSL_TEXTURE_2D_ARRAY_COMPARISON:
+            return "Texture2DArray";
+        default:
+            UNREACHABLE();
     }
 
     return "<unknown texture type>";
 }
 
+TString TextureString(const TBasicType type)
+{
+    return TextureString(TextureGroup(type));
+}
+
+TString TextureGroupSuffix(const HLSLTextureSamplerGroup type)
+{
+    switch (type)
+    {
+        case HLSL_TEXTURE_2D:
+            return "2D";
+        case HLSL_TEXTURE_CUBE:
+            return "Cube";
+        case HLSL_TEXTURE_2D_ARRAY:
+            return "2DArray";
+        case HLSL_TEXTURE_3D:
+            return "3D";
+        case HLSL_TEXTURE_2D_INT4:
+            return "2D_int4_";
+        case HLSL_TEXTURE_3D_INT4:
+            return "3D_int4_";
+        case HLSL_TEXTURE_2D_ARRAY_INT4:
+            return "2DArray_int4_";
+        case HLSL_TEXTURE_2D_UINT4:
+            return "2D_uint4_";
+        case HLSL_TEXTURE_3D_UINT4:
+            return "3D_uint4_";
+        case HLSL_TEXTURE_2D_ARRAY_UINT4:
+            return "2DArray_uint4_";
+        case HLSL_TEXTURE_2D_COMPARISON:
+            return "2D_comparison";
+        case HLSL_TEXTURE_CUBE_COMPARISON:
+            return "Cube_comparison";
+        case HLSL_TEXTURE_2D_ARRAY_COMPARISON:
+            return "2DArray_comparison";
+        default:
+            UNREACHABLE();
+    }
+
+    return "<unknown texture type>";
+}
+
+TString TextureGroupSuffix(const TBasicType type)
+{
+    return TextureGroupSuffix(TextureGroup(type));
+}
+
+TString TextureTypeSuffix(const TBasicType type)
+{
+    switch (type)
+    {
+        case EbtISamplerCube:
+            return "Cube_int4_";
+        case EbtUSamplerCube:
+            return "Cube_uint4_";
+        default:
+            // All other types are identified by their group suffix
+            return TextureGroupSuffix(type);
+    }
+}
+
 TString DecorateUniform(const TString &string, const TType &type)
 {
     if (type.getBasicType() == EbtSamplerExternalOES)
@@ -270,4 +396,43 @@
     return "";
 }
 
+int HLSLTextureCoordsCount(const TBasicType samplerType)
+{
+    switch (samplerType)
+    {
+        case EbtSampler2D:
+            return 2;
+        case EbtSampler3D:
+            return 3;
+        case EbtSamplerCube:
+            return 3;
+        case EbtSampler2DArray:
+            return 3;
+        case EbtISampler2D:
+            return 2;
+        case EbtISampler3D:
+            return 3;
+        case EbtISamplerCube:
+            return 3;
+        case EbtISampler2DArray:
+            return 3;
+        case EbtUSampler2D:
+            return 2;
+        case EbtUSampler3D:
+            return 3;
+        case EbtUSamplerCube:
+            return 3;
+        case EbtUSampler2DArray:
+            return 3;
+        case EbtSampler2DShadow:
+            return 2;
+        case EbtSamplerCubeShadow:
+            return 3;
+        case EbtSampler2DArrayShadow:
+            return 3;
+        default:
+            UNREACHABLE();
+    }
+    return 0;
+}
 }
diff --git a/src/compiler/translator/UtilsHLSL.h b/src/compiler/translator/UtilsHLSL.h
index 40b45f2..42444e3 100644
--- a/src/compiler/translator/UtilsHLSL.h
+++ b/src/compiler/translator/UtilsHLSL.h
@@ -20,8 +20,44 @@
 namespace sh
 {
 
-TString TextureString(const TType &type);
-TString SamplerString(const TType &type);
+// Unique combinations of HLSL Texture type and HLSL Sampler type.
+enum HLSLTextureSamplerGroup
+{
+    // Regular samplers
+    HLSL_TEXTURE_2D,
+    HLSL_TEXTURE_MIN = HLSL_TEXTURE_2D,
+
+    HLSL_TEXTURE_CUBE,
+    HLSL_TEXTURE_2D_ARRAY,
+    HLSL_TEXTURE_3D,
+    HLSL_TEXTURE_2D_INT4,
+    HLSL_TEXTURE_3D_INT4,
+    HLSL_TEXTURE_2D_ARRAY_INT4,
+    HLSL_TEXTURE_2D_UINT4,
+    HLSL_TEXTURE_3D_UINT4,
+    HLSL_TEXTURE_2D_ARRAY_UINT4,
+
+    // Comparison samplers
+
+    HLSL_TEXTURE_2D_COMPARISON,
+    HLSL_TEXTURE_CUBE_COMPARISON,
+    HLSL_TEXTURE_2D_ARRAY_COMPARISON,
+
+    HLSL_COMPARISON_SAMPLER_GROUP_BEGIN = HLSL_TEXTURE_2D_COMPARISON,
+    HLSL_COMPARISON_SAMPLER_GROUP_END   = HLSL_TEXTURE_2D_ARRAY_COMPARISON,
+
+    HLSL_TEXTURE_UNKNOWN,
+    HLSL_TEXTURE_MAX = HLSL_TEXTURE_UNKNOWN
+};
+
+HLSLTextureSamplerGroup TextureGroup(const TBasicType type);
+TString TextureString(const HLSLTextureSamplerGroup type);
+TString TextureString(const TBasicType type);
+TString TextureGroupSuffix(const HLSLTextureSamplerGroup type);
+TString TextureGroupSuffix(const TBasicType type);
+TString TextureTypeSuffix(const TBasicType type);
+TString SamplerString(const TBasicType type);
+TString SamplerString(HLSLTextureSamplerGroup type);
 // Prepends an underscore to avoid naming clashes
 TString Decorate(const TString &string);
 TString DecorateIfNeeded(const TName &name);
@@ -36,7 +72,7 @@
                                   bool useStd140Packing);
 TString InterpolationString(TQualifier qualifier);
 TString QualifierString(TQualifier qualifier);
-
+int HLSLTextureCoordsCount(const TBasicType samplerType);
 }
 
 #endif // COMPILER_TRANSLATOR_UTILSHLSL_H_
diff --git a/src/compiler/translator/blocklayoutHLSL.cpp b/src/compiler/translator/blocklayoutHLSL.cpp
index 2472ca8..4311924 100644
--- a/src/compiler/translator/blocklayoutHLSL.cpp
+++ b/src/compiler/translator/blocklayoutHLSL.cpp
@@ -113,9 +113,14 @@
 {
     switch (outputType)
     {
-      case SH_HLSL9_OUTPUT: return ENCODE_LOOSE;
-      case SH_HLSL11_OUTPUT: return ENCODE_PACKED;
-      default: UNREACHABLE(); return ENCODE_PACKED;
+        case SH_HLSL_3_0_OUTPUT:
+            return ENCODE_LOOSE;
+        case SH_HLSL_4_1_OUTPUT:
+        case SH_HLSL_4_0_FL9_3_OUTPUT:
+            return ENCODE_PACKED;
+        default:
+            UNREACHABLE();
+            return ENCODE_PACKED;
     }
 }