HLSL: Add conversions for image ops during SPV construction
HLSL allows image and texture types to be templatized on sub-vec4 types,
or even structures. This was mostly handled already during creation of
sampling operations. However, for operator[] which can generate image
loads, this wasn't happening.
It also isn't very easy to do at that point in time, because operator[]
does not know where the results it produces will end up. They may be
an lvalue or an rvalue, and there's a post-process to convert loads to
stores. They may end up in atomic ops.
To bypass that difficulty, GlslangToSpv now looks for this case and
adds the appropriate conversion. LIMITATION: this only works for
cases for which a simple conversion opcode suffices. That is to say,
it will not work if the type is templatized on a struct.
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 6e9fb38..dc644e5 100755
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -777,7 +777,7 @@
control = control | spv::LoopControlDontUnrollMask;
if (loopNode.getUnroll())
control = control | spv::LoopControlUnrollMask;
- if (loopNode.getLoopDependency() == glslang::TIntermLoop::dependencyInfinite)
+ if (unsigned(loopNode.getLoopDependency()) == glslang::TIntermLoop::dependencyInfinite)
control = control | spv::LoopControlDependencyInfiniteMask;
else if (loopNode.getLoopDependency() > 0) {
control = control | spv::LoopControlDependencyLengthMask;
@@ -3229,8 +3229,6 @@
builder.setLine(node->getLoc().line);
- auto resultType = [&node,this]{ return convertGlslangToSpvType(node->getType()); };
-
// Process a GLSL texturing op (will be SPV image)
const glslang::TSampler sampler = node->getAsAggregate() ? node->getAsAggregate()->getSequence()[0]->getAsTyped()->getType().getSampler()
: node->getAsUnaryNode()->getOperand()->getAsTyped()->getType().getSampler();
@@ -3279,6 +3277,20 @@
}
}
+ int components = node->getType().getVectorSize();
+
+ if (node->getOp() == glslang::EOpTextureFetch) {
+ // These must produce 4 components, per SPIR-V spec. We'll add a conversion constructor if needed.
+ // This will only happen through the HLSL path for operator[], so we do not have to handle e.g.
+ // the EOpTexture/Proj/Lod/etc family. It would be harmless to do so, but would need more logic
+ // here around e.g. which ones return scalars or other types.
+ components = 4;
+ }
+
+ glslang::TType returnType(node->getType().getBasicType(), glslang::EvqTemporary, components);
+
+ auto resultType = [&returnType,this]{ return convertGlslangToSpvType(returnType); };
+
// Check for image functions other than queries
if (node->isImage()) {
std::vector<spv::Id> operands;
@@ -3325,9 +3337,14 @@
if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown)
builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat);
- spv::Id result = builder.createOp(spv::OpImageRead, resultType(), operands);
- builder.setPrecision(result, precision);
- return result;
+ std::vector<spv::Id> result = { builder.createOp(spv::OpImageRead, resultType(), operands) };
+ builder.setPrecision(result[0], precision);
+
+ // If needed, add a conversion constructor to the proper size.
+ if (components != node->getType().getVectorSize())
+ result[0] = builder.createConstructor(precision, result, convertGlslangToSpvType(node->getType()));
+
+ return result[0];
#ifdef AMD_EXTENSIONS
} else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) {
#else
@@ -3601,7 +3618,14 @@
}
}
- return builder.createTextureCall(precision, resultType(), sparse, cracked.fetch, cracked.proj, cracked.gather, noImplicitLod, params);
+ std::vector<spv::Id> result = {
+ builder.createTextureCall(precision, resultType(), sparse, cracked.fetch, cracked.proj, cracked.gather, noImplicitLod, params)
+ };
+
+ if (components != node->getType().getVectorSize())
+ result[0] = builder.createConstructor(precision, result, convertGlslangToSpvType(node->getType()));
+
+ return result[0];
}
spv::Id TGlslangToSpvTraverser::handleUserFunctionCall(const glslang::TIntermAggregate* node)
diff --git a/Test/baseResults/hlsl.imagefetch-subvec4.comp.out b/Test/baseResults/hlsl.imagefetch-subvec4.comp.out
new file mode 100644
index 0000000..c453223
--- /dev/null
+++ b/Test/baseResults/hlsl.imagefetch-subvec4.comp.out
@@ -0,0 +1,142 @@
+hlsl.imagefetch-subvec4.comp
+Shader version: 500
+local_size = (8, 8, 8)
+0:? Sequence
+0:6 Function Definition: @main(vu3; ( temp void)
+0:6 Function Parameters:
+0:6 'tid' ( in 3-component vector of uint)
+0:? Sequence
+0:7 Sequence
+0:7 move second child to first child ( temp uint)
+0:7 'storeTemp' ( temp uint)
+0:7 Convert int to uint ( temp uint)
+0:7 textureFetch ( temp int)
+0:7 'IN' (layout( binding=0) uniform itexture3D)
+0:7 'tid' ( in 3-component vector of uint)
+0:7 Constant:
+0:7 0 (const int)
+0:7 imageStore ( temp void)
+0:7 'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:7 'tid' ( in 3-component vector of uint)
+0:7 'storeTemp' ( temp uint)
+0:7 'storeTemp' ( temp uint)
+0:6 Function Definition: main( ( temp void)
+0:6 Function Parameters:
+0:? Sequence
+0:6 move second child to first child ( temp 3-component vector of uint)
+0:? 'tid' ( temp 3-component vector of uint)
+0:? 'tid' ( in 3-component vector of uint GlobalInvocationID)
+0:6 Function Call: @main(vu3; ( temp void)
+0:? 'tid' ( temp 3-component vector of uint)
+0:? Linker Objects
+0:? 'IN' (layout( binding=0) uniform itexture3D)
+0:? 'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:? 'tid' ( in 3-component vector of uint GlobalInvocationID)
+
+
+Linked compute stage:
+
+
+Shader version: 500
+local_size = (8, 8, 8)
+0:? Sequence
+0:6 Function Definition: @main(vu3; ( temp void)
+0:6 Function Parameters:
+0:6 'tid' ( in 3-component vector of uint)
+0:? Sequence
+0:7 Sequence
+0:7 move second child to first child ( temp uint)
+0:7 'storeTemp' ( temp uint)
+0:7 Convert int to uint ( temp uint)
+0:7 textureFetch ( temp int)
+0:7 'IN' (layout( binding=0) uniform itexture3D)
+0:7 'tid' ( in 3-component vector of uint)
+0:7 Constant:
+0:7 0 (const int)
+0:7 imageStore ( temp void)
+0:7 'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:7 'tid' ( in 3-component vector of uint)
+0:7 'storeTemp' ( temp uint)
+0:7 'storeTemp' ( temp uint)
+0:6 Function Definition: main( ( temp void)
+0:6 Function Parameters:
+0:? Sequence
+0:6 move second child to first child ( temp 3-component vector of uint)
+0:? 'tid' ( temp 3-component vector of uint)
+0:? 'tid' ( in 3-component vector of uint GlobalInvocationID)
+0:6 Function Call: @main(vu3; ( temp void)
+0:? 'tid' ( temp 3-component vector of uint)
+0:? Linker Objects
+0:? 'IN' (layout( binding=0) uniform itexture3D)
+0:? 'OUT' (layout( binding=1 r32ui) uniform uimage3D)
+0:? 'tid' ( in 3-component vector of uint GlobalInvocationID)
+
+// Module Version 10000
+// Generated by (magic number): 80004
+// Id's are bound by 39
+
+ Capability Shader
+ 1: ExtInstImport "GLSL.std.450"
+ MemoryModel Logical GLSL450
+ EntryPoint GLCompute 4 "main" 34
+ ExecutionMode 4 LocalSize 8 8 8
+ Source HLSL 500
+ Name 4 "main"
+ Name 11 "@main(vu3;"
+ Name 10 "tid"
+ Name 14 "storeTemp"
+ Name 18 "IN"
+ Name 28 "OUT"
+ Name 32 "tid"
+ Name 34 "tid"
+ Name 36 "param"
+ Decorate 18(IN) DescriptorSet 0
+ Decorate 18(IN) Binding 0
+ Decorate 28(OUT) DescriptorSet 0
+ Decorate 28(OUT) Binding 1
+ Decorate 34(tid) BuiltIn GlobalInvocationId
+ 2: TypeVoid
+ 3: TypeFunction 2
+ 6: TypeInt 32 0
+ 7: TypeVector 6(int) 3
+ 8: TypePointer Function 7(ivec3)
+ 9: TypeFunction 2 8(ptr)
+ 13: TypePointer Function 6(int)
+ 15: TypeInt 32 1
+ 16: TypeImage 15(int) 3D sampled format:Unknown
+ 17: TypePointer UniformConstant 16
+ 18(IN): 17(ptr) Variable UniformConstant
+ 21: 15(int) Constant 0
+ 22: TypeVector 15(int) 4
+ 26: TypeImage 6(int) 3D nonsampled format:R32ui
+ 27: TypePointer UniformConstant 26
+ 28(OUT): 27(ptr) Variable UniformConstant
+ 33: TypePointer Input 7(ivec3)
+ 34(tid): 33(ptr) Variable Input
+ 4(main): 2 Function None 3
+ 5: Label
+ 32(tid): 8(ptr) Variable Function
+ 36(param): 8(ptr) Variable Function
+ 35: 7(ivec3) Load 34(tid)
+ Store 32(tid) 35
+ 37: 7(ivec3) Load 32(tid)
+ Store 36(param) 37
+ 38: 2 FunctionCall 11(@main(vu3;) 36(param)
+ Return
+ FunctionEnd
+ 11(@main(vu3;): 2 Function None 9
+ 10(tid): 8(ptr) FunctionParameter
+ 12: Label
+ 14(storeTemp): 13(ptr) Variable Function
+ 19: 16 Load 18(IN)
+ 20: 7(ivec3) Load 10(tid)
+ 23: 22(ivec4) ImageFetch 19 20 Lod 21
+ 24: 15(int) CompositeExtract 23 0
+ 25: 6(int) Bitcast 24
+ Store 14(storeTemp) 25
+ 29: 26 Load 28(OUT)
+ 30: 7(ivec3) Load 10(tid)
+ 31: 6(int) Load 14(storeTemp)
+ ImageWrite 29 30 31
+ Return
+ FunctionEnd
diff --git a/Test/hlsl.imagefetch-subvec4.comp b/Test/hlsl.imagefetch-subvec4.comp
new file mode 100644
index 0000000..2a83dd2
--- /dev/null
+++ b/Test/hlsl.imagefetch-subvec4.comp
@@ -0,0 +1,8 @@
+Texture3D<int> IN: register(t0);
+RWTexture3D<uint> OUT: register(u1);
+
+[numthreads(8,8,8)]
+void main(uint3 tid: SV_DispatchThreadID)
+{
+ OUT[tid] = IN[tid];
+}
diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp
index 42d3061..173d42c 100644
--- a/gtests/Hlsl.FromFile.cpp
+++ b/gtests/Hlsl.FromFile.cpp
@@ -189,6 +189,7 @@
{"hlsl.hull.ctrlpt-2.tesc", "main"},
{"hlsl.identifier.sample.frag", "main"},
{"hlsl.if.frag", "PixelShaderFunction"},
+ {"hlsl.imagefetch-subvec4.comp", "main"},
{"hlsl.implicitBool.frag", "main"},
{"hlsl.inf.vert", "main"},
{"hlsl.inoutquals.frag", "main"},