Merge pull request #2387 from BNieuwenhuizen/nonuniform
NonUniform SPIR-V fixes.
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 5b7f89a..1d153c0 100644
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -2308,7 +2308,8 @@
// The result of operation is always stored, but conditionally the
// consumed result. The consumed result is always an r-value.
- builder.accessChainStore(result);
+ builder.accessChainStore(result,
+ TranslateNonUniformDecoration(node->getOperand()->getType().getQualifier()));
builder.clearAccessChain();
if (node->getOp() == glslang::EOpPreIncrement ||
node->getOp() == glslang::EOpPreDecrement)
@@ -2384,6 +2385,7 @@
spv::Id invertedType = spv::NoType; // to use to override the natural type of the node
std::vector<spv::Builder::AccessChain> complexLvalues; // for holding swizzling l-values too complex for
// SPIR-V, for an out parameter
+ std::vector<glslang::TQualifier> complexLValueQualifiers;
std::vector<spv::Id> temporaryLvalues; // temporaries to pass, as proxies for complexLValues
auto resultType = [&invertedType, &node, this](){ return invertedType != spv::NoType ?
@@ -2627,6 +2629,10 @@
else
constructed = builder.createConstructor(precision, arguments, resultType());
+ if (node->getType().getQualifier().isNonUniform()) {
+ builder.addDecoration(constructed, spv::DecorationNonUniformEXT);
+ }
+
builder.clearAccessChain();
builder.setAccessChainRValue(constructed);
@@ -3001,6 +3007,7 @@
// receive the result, and must later swizzle that into the original
// l-value.
complexLvalues.push_back(builder.getAccessChain());
+ complexLValueQualifiers.push_back(glslangOperands[arg]->getAsTyped()->getType().getQualifier());
temporaryLvalues.push_back(builder.createVariable(
spv::NoPrecision, spv::StorageClassFunction,
builder.accessChainGetInferredType(), "swizzleTemp"));
@@ -3105,7 +3112,7 @@
for (unsigned int i = 0; i < temporaryLvalues.size(); ++i) {
builder.setAccessChain(complexLvalues[i]);
- builder.accessChainStore(builder.createLoad(temporaryLvalues[i], spv::NoPrecision));
+ builder.accessChainStore(builder.createLoad(temporaryLvalues[i], spv::NoPrecision), TranslateNonUniformDecoration(complexLValueQualifiers[i]));
}
}
@@ -4170,7 +4177,7 @@
unsigned int alignment = builder.getAccessChain().alignment;
alignment |= type.getBufferReferenceAlignment();
- builder.accessChainStore(rvalue,
+ builder.accessChainStore(rvalue, TranslateNonUniformDecoration(type.getQualifier()),
spv::MemoryAccessMask(TranslateMemoryAccess(coherentFlags) &
~spv::MemoryAccessMakePointerVisibleKHRMask),
TranslateMemoryScope(coherentFlags), alignment);
@@ -4766,12 +4773,15 @@
const bool isUnsignedResult = node->getType().getBasicType() == glslang::EbtUint;
+ if (builder.isSampledImage(params.sampler) &&
+ ((cracked.query && node->getOp() != glslang::EOpTextureQueryLod) || cracked.fragMask || cracked.fetch)) {
+ params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler);
+ if (imageType.getQualifier().isNonUniform()) {
+ builder.addDecoration(params.sampler, spv::DecorationNonUniformEXT);
+ }
+ }
// Check for queries
if (cracked.query) {
- // OpImageQueryLod works on a sampled image, for other queries the image has to be extracted first
- if (node->getOp() != glslang::EOpTextureQueryLod && builder.isSampledImage(params.sampler))
- params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler);
-
switch (node->getOp()) {
case glslang::EOpImageQuerySize:
case glslang::EOpTextureQuerySize:
@@ -5025,10 +5035,6 @@
auto opIt = arguments.begin();
std::vector<spv::Id> operands;
- // Extract the image if necessary
- if (builder.isSampledImage(params.sampler))
- params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler);
-
operands.push_back(params.sampler);
++opIt;
@@ -5089,13 +5095,6 @@
bias = true;
}
- // See if the sampler param should really be just the SPV image part
- if (cracked.fetch) {
- // a fetch needs to have the image extracted first
- if (builder.isSampledImage(params.sampler))
- params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler);
- }
-
#ifndef GLSLANG_WEB
if (cracked.gather) {
const auto& sourceExtensions = glslangIntermediate->getRequestedExtensions();
@@ -5255,7 +5254,7 @@
builder.accessChainPush(builder.makeIntConstant(i), flags, 0);
builder.accessChainStore(builder.createCompositeExtract(res, builder.getContainedTypeId(resType, i+1),
- i+1));
+ i+1), TranslateNonUniformDecoration(imageType.getQualifier()));
}
return builder.createCompositeExtract(res, resultType(), 0);
}
diff --git a/SPIRV/SpvBuilder.cpp b/SPIRV/SpvBuilder.cpp
index 9680331..42896ce 100644
--- a/SPIRV/SpvBuilder.cpp
+++ b/SPIRV/SpvBuilder.cpp
@@ -2761,12 +2761,14 @@
}
// Comments in header
-void Builder::accessChainStore(Id rvalue, spv::MemoryAccessMask memoryAccess, spv::Scope scope, unsigned int alignment)
+void Builder::accessChainStore(Id rvalue, Decoration nonUniform, spv::MemoryAccessMask memoryAccess, spv::Scope scope, unsigned int alignment)
{
assert(accessChain.isRValue == false);
transferAccessChainSwizzle(true);
Id base = collapseAccessChain();
+ addDecoration(base, nonUniform);
+
Id source = rvalue;
// dynamic component should be gone
diff --git a/SPIRV/SpvBuilder.h b/SPIRV/SpvBuilder.h
index 077945e..f93e182 100644
--- a/SPIRV/SpvBuilder.h
+++ b/SPIRV/SpvBuilder.h
@@ -721,7 +721,8 @@
}
// use accessChain and swizzle to store value
- void accessChainStore(Id rvalue, spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone,
+ void accessChainStore(Id rvalue, Decoration nonUniform,
+ spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone,
spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0);
// use accessChain and swizzle to load an r-value
diff --git a/Test/baseResults/spv.nonuniform.frag.out b/Test/baseResults/spv.nonuniform.frag.out
index 8d61619..66f53d5 100644
--- a/Test/baseResults/spv.nonuniform.frag.out
+++ b/Test/baseResults/spv.nonuniform.frag.out
@@ -1,7 +1,7 @@
spv.nonuniform.frag
// Module Version 10000
// Generated by (magic number): 8000a
-// Id's are bound by 212
+// Id's are bound by 235
Capability Shader
Capability InputAttachment
@@ -22,7 +22,7 @@
Extension "SPV_EXT_descriptor_indexing"
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
- EntryPoint Fragment 4 "main" 35 92
+ EntryPoint Fragment 4 "main" 35 92 182
ExecutionMode 4 OriginUpperLeft
Source GLSL 450
SourceExtension "GL_EXT_nonuniform_qualifier"
@@ -53,20 +53,26 @@
Name 139 "inputAttachment"
Name 149 "uniformTexelBuffer"
Name 160 "storageTexelBuffer"
- Name 170 "v"
- Name 185 "uv"
- Name 195 "m"
- Name 203 "S"
- MemberName 203(S) 0 "a"
- Name 205 "s"
+ Name 171 "uniformTexArr"
+ Name 178 "uniformSampler"
+ Name 182 "inTexcoord"
+ Name 190 "v"
+ Name 205 "uv"
+ Name 215 "m"
+ Name 223 "S"
+ MemberName 223(S) 0 "a"
+ Name 225 "s"
Decorate 9(nupi) DecorationNonUniformEXT
Decorate 13 DecorationNonUniformEXT
Decorate 17(nu_li) DecorationNonUniformEXT
Decorate 17(nu_li) DecorationNonUniformEXT
Decorate 19 DecorationNonUniformEXT
+ Decorate 18(param) DecorationNonUniformEXT
+ Decorate 17(nu_li) DecorationNonUniformEXT
Decorate 24 DecorationNonUniformEXT
Decorate 28 DecorationNonUniformEXT
Decorate 29 DecorationNonUniformEXT
+ Decorate 17(nu_li) DecorationNonUniformEXT
Decorate 35(nu_inv4) Location 0
Decorate 35(nu_inv4) DecorationNonUniformEXT
Decorate 39 DecorationNonUniformEXT
@@ -126,35 +132,48 @@
Decorate 150 DecorationNonUniformEXT
Decorate 151 DecorationNonUniformEXT
Decorate 152 DecorationNonUniformEXT
+ Decorate 153 DecorationNonUniformEXT
Decorate 160(storageTexelBuffer) DescriptorSet 0
Decorate 160(storageTexelBuffer) Binding 9
Decorate 92(nu_ii) DecorationNonUniformEXT
Decorate 161 DecorationNonUniformEXT
Decorate 162 DecorationNonUniformEXT
Decorate 163 DecorationNonUniformEXT
- Decorate 170(v) DecorationNonUniformEXT
+ Decorate 171(uniformTexArr) DescriptorSet 0
+ Decorate 171(uniformTexArr) Binding 10
+ Decorate 92(nu_ii) DecorationNonUniformEXT
Decorate 172 DecorationNonUniformEXT
- Decorate 173 DecorationNonUniformEXT
Decorate 174 DecorationNonUniformEXT
Decorate 175 DecorationNonUniformEXT
- Decorate 179 DecorationNonUniformEXT
- Decorate 180 DecorationNonUniformEXT
- Decorate 181 DecorationNonUniformEXT
- Decorate 182 DecorationNonUniformEXT
+ Decorate 178(uniformSampler) DescriptorSet 0
+ Decorate 178(uniformSampler) Binding 11
+ Decorate 182(inTexcoord) Location 2
+ Decorate 190(v) DecorationNonUniformEXT
+ Decorate 192 DecorationNonUniformEXT
+ Decorate 193 DecorationNonUniformEXT
+ Decorate 194 DecorationNonUniformEXT
+ Decorate 195 DecorationNonUniformEXT
+ Decorate 199 DecorationNonUniformEXT
+ Decorate 200 DecorationNonUniformEXT
+ Decorate 201 DecorationNonUniformEXT
+ Decorate 202 DecorationNonUniformEXT
Decorate 92(nu_ii) DecorationNonUniformEXT
- Decorate 186 DecorationNonUniformEXT
- Decorate 187 DecorationNonUniformEXT
- Decorate 188 DecorationNonUniformEXT
- Decorate 189 DecorationNonUniformEXT
- Decorate 190 DecorationNonUniformEXT
- Decorate 195(m) DecorationNonUniformEXT
- Decorate 196 DecorationNonUniformEXT
- Decorate 197 DecorationNonUniformEXT
- Decorate 205(s) DecorationNonUniformEXT
Decorate 206 DecorationNonUniformEXT
Decorate 207 DecorationNonUniformEXT
Decorate 208 DecorationNonUniformEXT
Decorate 209 DecorationNonUniformEXT
+ Decorate 210 DecorationNonUniformEXT
+ Decorate 215(m) DecorationNonUniformEXT
+ Decorate 216 DecorationNonUniformEXT
+ Decorate 217 DecorationNonUniformEXT
+ Decorate 225(s) DecorationNonUniformEXT
+ Decorate 226 DecorationNonUniformEXT
+ Decorate 227 DecorationNonUniformEXT
+ Decorate 228 DecorationNonUniformEXT
+ Decorate 229 DecorationNonUniformEXT
+ Decorate 92(nu_ii) DecorationNonUniformEXT
+ Decorate 232 DecorationNonUniformEXT
+ Decorate 234 DecorationNonUniformEXT
2: TypeVoid
3: TypeFunction 2
6: TypeInt 32 1
@@ -224,14 +243,24 @@
158: TypeRuntimeArray 75
159: TypePointer UniformConstant 158
160(storageTexelBuffer): 159(ptr) Variable UniformConstant
- 168: TypeVector 6(int) 4
- 169: TypePointer Function 168(ivec4)
- 171: 36(int) Constant 1
- 178: 36(int) Constant 2
- 193: TypeMatrix 33(fvec4) 4
- 194: TypePointer Function 193
- 203(S): TypeStruct 6(int)
- 204: TypePointer Function 203(S)
+ 168: 36(int) Constant 8
+ 169: TypeArray 108 168
+ 170: TypePointer UniformConstant 169
+171(uniformTexArr): 170(ptr) Variable UniformConstant
+ 173: TypePointer UniformConstant 108
+ 176: TypeSampler
+ 177: TypePointer UniformConstant 176
+178(uniformSampler): 177(ptr) Variable UniformConstant
+ 181: TypePointer Input 117(fvec2)
+ 182(inTexcoord): 181(ptr) Variable Input
+ 188: TypeVector 6(int) 4
+ 189: TypePointer Function 188(ivec4)
+ 191: 36(int) Constant 1
+ 198: 36(int) Constant 2
+ 213: TypeMatrix 33(fvec4) 4
+ 214: TypePointer Function 213
+ 223(S): TypeStruct 6(int)
+ 224: TypePointer Function 223(S)
4(main): 2 Function None 3
5: Label
16(a): 7(ptr) Variable Function
@@ -241,10 +270,10 @@
32(b): 31(ptr) Variable Function
41(nu_gf): 31(ptr) Variable Function
48(dyn_i): 7(ptr) Variable Function
- 170(v): 169(ptr) Variable Function
- 185(uv): 169(ptr) Variable Function
- 195(m): 194(ptr) Variable Function
- 205(s): 204(ptr) Variable Function
+ 190(v): 189(ptr) Variable Function
+ 205(uv): 189(ptr) Variable Function
+ 215(m): 214(ptr) Variable Function
+ 225(s): 224(ptr) Variable Function
19: 6(int) Load 17(nu_li)
Store 18(param) 19
21: 6(int) FunctionCall 11(foo(i1;i1;) 18(param) 20(param)
@@ -341,43 +370,58 @@
166: 30(float) Load 32(b)
167: 30(float) FAdd 166 165
Store 32(b) 167
- 172: 7(ptr) AccessChain 170(v) 171
- 173: 6(int) Load 172
- 174: 94(ptr) AccessChain 90(uniformBuffer) 173 53
- 175: 30(float) Load 174
- 176: 30(float) Load 32(b)
- 177: 30(float) FAdd 176 175
- Store 32(b) 177
- 179: 7(ptr) AccessChain 170(v) 178
- 180: 6(int) Load 179
- 181: 94(ptr) AccessChain 90(uniformBuffer) 180 53
- 182: 30(float) Load 181
- 183: 30(float) Load 32(b)
- 184: 30(float) FAdd 183 182
- Store 32(b) 184
- 186: 6(int) Load 92(nu_ii)
- 187: 7(ptr) AccessChain 185(uv) 186
- 188: 6(int) Load 187
- 189: 94(ptr) AccessChain 90(uniformBuffer) 188 53
- 190: 30(float) Load 189
- 191: 30(float) Load 32(b)
- 192: 30(float) FAdd 191 190
- Store 32(b) 192
- 196: 31(ptr) AccessChain 195(m) 26 178
- 197: 30(float) Load 196
- 198: 6(int) ConvertFToS 197
- 199: 94(ptr) AccessChain 90(uniformBuffer) 198 53
- 200: 30(float) Load 199
- 201: 30(float) Load 32(b)
- 202: 30(float) FAdd 201 200
- Store 32(b) 202
- 206: 7(ptr) AccessChain 205(s) 53
- 207: 6(int) Load 206
- 208: 94(ptr) AccessChain 90(uniformBuffer) 207 53
- 209: 30(float) Load 208
- 210: 30(float) Load 32(b)
- 211: 30(float) FAdd 210 209
- Store 32(b) 211
+ 172: 6(int) Load 92(nu_ii)
+ 174: 173(ptr) AccessChain 171(uniformTexArr) 172
+ 175: 108 Load 174
+ 179: 176 Load 178(uniformSampler)
+ 180: 109 SampledImage 175 179
+ 183: 117(fvec2) Load 182(inTexcoord)
+ 184: 33(fvec4) ImageSampleImplicitLod 180 183
+ 185: 30(float) CompositeExtract 184 0
+ 186: 30(float) Load 32(b)
+ 187: 30(float) FAdd 186 185
+ Store 32(b) 187
+ 192: 7(ptr) AccessChain 190(v) 191
+ 193: 6(int) Load 192
+ 194: 94(ptr) AccessChain 90(uniformBuffer) 193 53
+ 195: 30(float) Load 194
+ 196: 30(float) Load 32(b)
+ 197: 30(float) FAdd 196 195
+ Store 32(b) 197
+ 199: 7(ptr) AccessChain 190(v) 198
+ 200: 6(int) Load 199
+ 201: 94(ptr) AccessChain 90(uniformBuffer) 200 53
+ 202: 30(float) Load 201
+ 203: 30(float) Load 32(b)
+ 204: 30(float) FAdd 203 202
+ Store 32(b) 204
+ 206: 6(int) Load 92(nu_ii)
+ 207: 7(ptr) AccessChain 205(uv) 206
+ 208: 6(int) Load 207
+ 209: 94(ptr) AccessChain 90(uniformBuffer) 208 53
+ 210: 30(float) Load 209
+ 211: 30(float) Load 32(b)
+ 212: 30(float) FAdd 211 210
+ Store 32(b) 212
+ 216: 31(ptr) AccessChain 215(m) 26 198
+ 217: 30(float) Load 216
+ 218: 6(int) ConvertFToS 217
+ 219: 94(ptr) AccessChain 90(uniformBuffer) 218 53
+ 220: 30(float) Load 219
+ 221: 30(float) Load 32(b)
+ 222: 30(float) FAdd 221 220
+ Store 32(b) 222
+ 226: 7(ptr) AccessChain 225(s) 53
+ 227: 6(int) Load 226
+ 228: 94(ptr) AccessChain 90(uniformBuffer) 227 53
+ 229: 30(float) Load 228
+ 230: 30(float) Load 32(b)
+ 231: 30(float) FAdd 230 229
+ Store 32(b) 231
+ 232: 6(int) Load 92(nu_ii)
+ 233: 30(float) Load 32(b)
+ 234: 94(ptr) AccessChain 102(storageBuffer) 232 53
+ Store 234 233
Return
FunctionEnd
11(foo(i1;i1;): 6(int) Function None 8
diff --git a/Test/spv.nonuniform.frag b/Test/spv.nonuniform.frag
index d3b05a5..c136d25 100644
--- a/Test/spv.nonuniform.frag
+++ b/Test/spv.nonuniform.frag
@@ -5,6 +5,7 @@
layout(location=0) nonuniformEXT in vec4 nu_inv4;
nonuniformEXT float nu_gf;
layout(location=1) in nonuniformEXT flat int nu_ii;
+layout(location = 2) in vec2 inTexcoord;
layout(binding=0, input_attachment_index = 0) uniform subpassInput inputAttachmentDyn[];
layout(binding=1) uniform samplerBuffer uniformTexelBufferDyn[];
@@ -16,6 +17,8 @@
layout(binding=7, input_attachment_index = 1) uniform subpassInput inputAttachment[];
layout(binding=8) uniform samplerBuffer uniformTexelBuffer[];
layout(binding=9, r32f) uniform imageBuffer storageTexelBuffer[];
+layout(binding = 10) uniform texture2D uniformTexArr[8];
+layout(binding = 11) uniform sampler uniformSampler;
nonuniformEXT int foo(nonuniformEXT int nupi, nonuniformEXT out int f)
{
@@ -42,6 +45,7 @@
b += subpassLoad(inputAttachment[nu_ii]).x;
b += texelFetch(uniformTexelBuffer[nu_ii], 1).x;
b += imageLoad(storageTexelBuffer[nu_ii], 1).x;
+ b += texture(sampler2D(uniformTexArr[nu_ii], uniformSampler), inTexcoord.xy).x;
nonuniformEXT ivec4 v;
nonuniformEXT mat4 m;
@@ -52,4 +56,6 @@
b += uniformBuffer[uv[nu_ii]].a;
b += uniformBuffer[int(m[2].z)].a;
b += uniformBuffer[s.a].a;
+
+ storageBuffer[nu_ii].b = b;
}