HLSL: Add conversions for image ops during SPV construction

HLSL allows image and texture types to be templatized on sub-vec4 types,
or even structures.  This was mostly handled already during creation of
sampling operations.  However, for operator[] which can generate image
loads, this wasn't happening.

It also isn't very easy to do at that point in time, because operator[]
does not know where the results it produces will end up.  They may be
an lvalue or an rvalue, and there's a post-process to convert loads to
stores.  They may end up in atomic ops.

To bypass that difficulty, GlslangToSpv now looks for this case and
adds the appropriate conversion.  LIMITATION: this only works for
cases for which a simple conversion opcode suffices.  That is to say,
it will not work if the type is templatized on a struct.
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 6e9fb38..dc644e5 100755
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -777,7 +777,7 @@
         control = control | spv::LoopControlDontUnrollMask;
     if (loopNode.getUnroll())
         control = control | spv::LoopControlUnrollMask;
-    if (loopNode.getLoopDependency() == glslang::TIntermLoop::dependencyInfinite)
+    if (unsigned(loopNode.getLoopDependency()) == glslang::TIntermLoop::dependencyInfinite)
         control = control | spv::LoopControlDependencyInfiniteMask;
     else if (loopNode.getLoopDependency() > 0) {
         control = control | spv::LoopControlDependencyLengthMask;
@@ -3229,8 +3229,6 @@
 
     builder.setLine(node->getLoc().line);
 
-    auto resultType = [&node,this]{ return convertGlslangToSpvType(node->getType()); };
-
     // Process a GLSL texturing op (will be SPV image)
     const glslang::TSampler sampler = node->getAsAggregate() ? node->getAsAggregate()->getSequence()[0]->getAsTyped()->getType().getSampler()
                                                              : node->getAsUnaryNode()->getOperand()->getAsTyped()->getType().getSampler();
@@ -3279,6 +3277,20 @@
         }
     }
 
+    int components = node->getType().getVectorSize();
+
+    if (node->getOp() == glslang::EOpTextureFetch) {
+        // These must produce 4 components, per SPIR-V spec.  We'll add a conversion constructor if needed.
+        // This will only happen through the HLSL path for operator[], so we do not have to handle e.g.
+        // the EOpTexture/Proj/Lod/etc family.  It would be harmless to do so, but would need more logic
+        // here around e.g. which ones return scalars or other types.
+        components = 4;
+    }
+
+    glslang::TType returnType(node->getType().getBasicType(), glslang::EvqTemporary, components);
+
+    auto resultType = [&returnType,this]{ return convertGlslangToSpvType(returnType); };
+
     // Check for image functions other than queries
     if (node->isImage()) {
         std::vector<spv::Id> operands;
@@ -3325,9 +3337,14 @@
             if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown)
                 builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat);
 
-            spv::Id result = builder.createOp(spv::OpImageRead, resultType(), operands);
-            builder.setPrecision(result, precision);
-            return result;
+            std::vector<spv::Id> result = { builder.createOp(spv::OpImageRead, resultType(), operands) };
+            builder.setPrecision(result[0], precision);
+
+            // If needed, add a conversion constructor to the proper size.
+            if (components != node->getType().getVectorSize())
+                result[0] = builder.createConstructor(precision, result, convertGlslangToSpvType(node->getType()));
+
+            return result[0];
 #ifdef AMD_EXTENSIONS
         } else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) {
 #else
@@ -3601,7 +3618,14 @@
         }
     }
 
-    return builder.createTextureCall(precision, resultType(), sparse, cracked.fetch, cracked.proj, cracked.gather, noImplicitLod, params);
+    std::vector<spv::Id> result = { 
+        builder.createTextureCall(precision, resultType(), sparse, cracked.fetch, cracked.proj, cracked.gather, noImplicitLod, params)
+    };
+
+    if (components != node->getType().getVectorSize())
+        result[0] = builder.createConstructor(precision, result, convertGlslangToSpvType(node->getType()));
+
+    return result[0];
 }
 
 spv::Id TGlslangToSpvTraverser::handleUserFunctionCall(const glslang::TIntermAggregate* node)
diff --git a/Test/baseResults/hlsl.imagefetch-subvec4.comp.out b/Test/baseResults/hlsl.imagefetch-subvec4.comp.out
new file mode 100644
index 0000000..c453223
--- /dev/null
+++ b/Test/baseResults/hlsl.imagefetch-subvec4.comp.out
@@ -0,0 +1,142 @@
+hlsl.imagefetch-subvec4.comp
+Shader version: 500
+local_size = (8, 8, 8)
+0:? Sequence
+0:6  Function Definition: @main(vu3; ( temp void)
+0:6    Function Parameters: 
+0:6      'tid' ( in 3-component vector of uint)
+0:?     Sequence
+0:7      Sequence
+0:7        move second child to first child ( temp uint)
+0:7          'storeTemp' ( temp uint)
+0:7          Convert int to uint ( temp uint)
+0:7            textureFetch ( temp int)
+0:7              'IN' (layout( binding=0) uniform itexture3D)
+0:7              'tid' ( in 3-component vector of uint)
+0:7              Constant:
+0:7                0 (const int)
+0:7        imageStore ( temp void)
+0:7          'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:7          'tid' ( in 3-component vector of uint)
+0:7          'storeTemp' ( temp uint)
+0:7        'storeTemp' ( temp uint)
+0:6  Function Definition: main( ( temp void)
+0:6    Function Parameters: 
+0:?     Sequence
+0:6      move second child to first child ( temp 3-component vector of uint)
+0:?         'tid' ( temp 3-component vector of uint)
+0:?         'tid' ( in 3-component vector of uint GlobalInvocationID)
+0:6      Function Call: @main(vu3; ( temp void)
+0:?         'tid' ( temp 3-component vector of uint)
+0:?   Linker Objects
+0:?     'IN' (layout( binding=0) uniform itexture3D)
+0:?     'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:?     'tid' ( in 3-component vector of uint GlobalInvocationID)
+
+
+Linked compute stage:
+
+
+Shader version: 500
+local_size = (8, 8, 8)
+0:? Sequence
+0:6  Function Definition: @main(vu3; ( temp void)
+0:6    Function Parameters: 
+0:6      'tid' ( in 3-component vector of uint)
+0:?     Sequence
+0:7      Sequence
+0:7        move second child to first child ( temp uint)
+0:7          'storeTemp' ( temp uint)
+0:7          Convert int to uint ( temp uint)
+0:7            textureFetch ( temp int)
+0:7              'IN' (layout( binding=0) uniform itexture3D)
+0:7              'tid' ( in 3-component vector of uint)
+0:7              Constant:
+0:7                0 (const int)
+0:7        imageStore ( temp void)
+0:7          'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:7          'tid' ( in 3-component vector of uint)
+0:7          'storeTemp' ( temp uint)
+0:7        'storeTemp' ( temp uint)
+0:6  Function Definition: main( ( temp void)
+0:6    Function Parameters: 
+0:?     Sequence
+0:6      move second child to first child ( temp 3-component vector of uint)
+0:?         'tid' ( temp 3-component vector of uint)
+0:?         'tid' ( in 3-component vector of uint GlobalInvocationID)
+0:6      Function Call: @main(vu3; ( temp void)
+0:?         'tid' ( temp 3-component vector of uint)
+0:?   Linker Objects
+0:?     'IN' (layout( binding=0) uniform itexture3D)
+0:?     'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:?     'tid' ( in 3-component vector of uint GlobalInvocationID)
+
+// Module Version 10000
+// Generated by (magic number): 80004
+// Id's are bound by 39
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 34
+                              ExecutionMode 4 LocalSize 8 8 8
+                              Source HLSL 500
+                              Name 4  "main"
+                              Name 11  "@main(vu3;"
+                              Name 10  "tid"
+                              Name 14  "storeTemp"
+                              Name 18  "IN"
+                              Name 28  "OUT"
+                              Name 32  "tid"
+                              Name 34  "tid"
+                              Name 36  "param"
+                              Decorate 18(IN) DescriptorSet 0
+                              Decorate 18(IN) Binding 0
+                              Decorate 28(OUT) DescriptorSet 0
+                              Decorate 28(OUT) Binding 1
+                              Decorate 34(tid) BuiltIn GlobalInvocationId
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypeVector 6(int) 3
+               8:             TypePointer Function 7(ivec3)
+               9:             TypeFunction 2 8(ptr)
+              13:             TypePointer Function 6(int)
+              15:             TypeInt 32 1
+              16:             TypeImage 15(int) 3D sampled format:Unknown
+              17:             TypePointer UniformConstant 16
+          18(IN):     17(ptr) Variable UniformConstant
+              21:     15(int) Constant 0
+              22:             TypeVector 15(int) 4
+              26:             TypeImage 6(int) 3D nonsampled format:R32ui
+              27:             TypePointer UniformConstant 26
+         28(OUT):     27(ptr) Variable UniformConstant
+              33:             TypePointer Input 7(ivec3)
+         34(tid):     33(ptr) Variable Input
+         4(main):           2 Function None 3
+               5:             Label
+         32(tid):      8(ptr) Variable Function
+       36(param):      8(ptr) Variable Function
+              35:    7(ivec3) Load 34(tid)
+                              Store 32(tid) 35
+              37:    7(ivec3) Load 32(tid)
+                              Store 36(param) 37
+              38:           2 FunctionCall 11(@main(vu3;) 36(param)
+                              Return
+                              FunctionEnd
+  11(@main(vu3;):           2 Function None 9
+         10(tid):      8(ptr) FunctionParameter
+              12:             Label
+   14(storeTemp):     13(ptr) Variable Function
+              19:          16 Load 18(IN)
+              20:    7(ivec3) Load 10(tid)
+              23:   22(ivec4) ImageFetch 19 20 Lod 21
+              24:     15(int) CompositeExtract 23 0
+              25:      6(int) Bitcast 24
+                              Store 14(storeTemp) 25
+              29:          26 Load 28(OUT)
+              30:    7(ivec3) Load 10(tid)
+              31:      6(int) Load 14(storeTemp)
+                              ImageWrite 29 30 31
+                              Return
+                              FunctionEnd
diff --git a/Test/hlsl.imagefetch-subvec4.comp b/Test/hlsl.imagefetch-subvec4.comp
new file mode 100644
index 0000000..2a83dd2
--- /dev/null
+++ b/Test/hlsl.imagefetch-subvec4.comp
@@ -0,0 +1,8 @@
+Texture3D<int> IN: register(t0);
+RWTexture3D<uint> OUT: register(u1);
+
+[numthreads(8,8,8)]
+void main(uint3 tid: SV_DispatchThreadID)
+{
+    OUT[tid] = IN[tid];
+}
diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp
index 42d3061..173d42c 100644
--- a/gtests/Hlsl.FromFile.cpp
+++ b/gtests/Hlsl.FromFile.cpp
@@ -189,6 +189,7 @@
         {"hlsl.hull.ctrlpt-2.tesc", "main"},
         {"hlsl.identifier.sample.frag", "main"},
         {"hlsl.if.frag", "PixelShaderFunction"},
+        {"hlsl.imagefetch-subvec4.comp", "main"},
         {"hlsl.implicitBool.frag", "main"},
         {"hlsl.inf.vert", "main"},
         {"hlsl.inoutquals.frag", "main"},