Implement GL_NV_shader_subgroup_partitioned
diff --git a/SPIRV/GLSL.ext.NV.h b/SPIRV/GLSL.ext.NV.h
index c01858b..148d4b4 100644
--- a/SPIRV/GLSL.ext.NV.h
+++ b/SPIRV/GLSL.ext.NV.h
@@ -51,4 +51,7 @@
//SPV_NVX_multiview_per_view_attributes
const char* const E_SPV_NVX_multiview_per_view_attributes = "SPV_NVX_multiview_per_view_attributes";
+//SPV_NV_shader_subgroup_partitioned
+const char* const E_SPV_NV_shader_subgroup_partitioned = "SPV_NV_shader_subgroup_partitioned";
+
#endif // #ifndef GLSLextNV_H
\ No newline at end of file
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
old mode 100644
new mode 100755
index d39c260..74cb9e9
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -4666,7 +4666,13 @@
libCall = spv::CubeFaceCoordAMD;
break;
#endif
-
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartition:
+ builder.addExtension(spv::E_SPV_NV_shader_subgroup_partitioned);
+ builder.addCapability(spv::CapabilityGroupNonUniformPartitionedNV);
+ unaryOp = spv::OpGroupNonUniformPartitionNV;
+ break;
+#endif
default:
return 0;
}
@@ -5629,6 +5635,32 @@
builder.addCapability(spv::CapabilityGroupNonUniform);
builder.addCapability(spv::CapabilityGroupNonUniformQuad);
break;
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedAdd:
+ case glslang::EOpSubgroupPartitionedMul:
+ case glslang::EOpSubgroupPartitionedMin:
+ case glslang::EOpSubgroupPartitionedMax:
+ case glslang::EOpSubgroupPartitionedAnd:
+ case glslang::EOpSubgroupPartitionedOr:
+ case glslang::EOpSubgroupPartitionedXor:
+ case glslang::EOpSubgroupPartitionedInclusiveAdd:
+ case glslang::EOpSubgroupPartitionedInclusiveMul:
+ case glslang::EOpSubgroupPartitionedInclusiveMin:
+ case glslang::EOpSubgroupPartitionedInclusiveMax:
+ case glslang::EOpSubgroupPartitionedInclusiveAnd:
+ case glslang::EOpSubgroupPartitionedInclusiveOr:
+ case glslang::EOpSubgroupPartitionedInclusiveXor:
+ case glslang::EOpSubgroupPartitionedExclusiveAdd:
+ case glslang::EOpSubgroupPartitionedExclusiveMul:
+ case glslang::EOpSubgroupPartitionedExclusiveMin:
+ case glslang::EOpSubgroupPartitionedExclusiveMax:
+ case glslang::EOpSubgroupPartitionedExclusiveAnd:
+ case glslang::EOpSubgroupPartitionedExclusiveOr:
+ case glslang::EOpSubgroupPartitionedExclusiveXor:
+ builder.addExtension(spv::E_SPV_NV_shader_subgroup_partitioned);
+ builder.addCapability(spv::CapabilityGroupNonUniformPartitionedNV);
+ break;
+#endif
default: assert(0 && "Unhandled subgroup operation!");
}
@@ -5662,6 +5694,11 @@
case glslang::EOpSubgroupInclusiveAdd:
case glslang::EOpSubgroupExclusiveAdd:
case glslang::EOpSubgroupClusteredAdd:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedAdd:
+ case glslang::EOpSubgroupPartitionedInclusiveAdd:
+ case glslang::EOpSubgroupPartitionedExclusiveAdd:
+#endif
if (isFloat) {
opCode = spv::OpGroupNonUniformFAdd;
} else {
@@ -5672,6 +5709,11 @@
case glslang::EOpSubgroupInclusiveMul:
case glslang::EOpSubgroupExclusiveMul:
case glslang::EOpSubgroupClusteredMul:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedMul:
+ case glslang::EOpSubgroupPartitionedInclusiveMul:
+ case glslang::EOpSubgroupPartitionedExclusiveMul:
+#endif
if (isFloat) {
opCode = spv::OpGroupNonUniformFMul;
} else {
@@ -5682,6 +5724,11 @@
case glslang::EOpSubgroupInclusiveMin:
case glslang::EOpSubgroupExclusiveMin:
case glslang::EOpSubgroupClusteredMin:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedMin:
+ case glslang::EOpSubgroupPartitionedInclusiveMin:
+ case glslang::EOpSubgroupPartitionedExclusiveMin:
+#endif
if (isFloat) {
opCode = spv::OpGroupNonUniformFMin;
} else if (isUnsigned) {
@@ -5694,6 +5741,11 @@
case glslang::EOpSubgroupInclusiveMax:
case glslang::EOpSubgroupExclusiveMax:
case glslang::EOpSubgroupClusteredMax:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedMax:
+ case glslang::EOpSubgroupPartitionedInclusiveMax:
+ case glslang::EOpSubgroupPartitionedExclusiveMax:
+#endif
if (isFloat) {
opCode = spv::OpGroupNonUniformFMax;
} else if (isUnsigned) {
@@ -5706,6 +5758,11 @@
case glslang::EOpSubgroupInclusiveAnd:
case glslang::EOpSubgroupExclusiveAnd:
case glslang::EOpSubgroupClusteredAnd:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedAnd:
+ case glslang::EOpSubgroupPartitionedInclusiveAnd:
+ case glslang::EOpSubgroupPartitionedExclusiveAnd:
+#endif
if (isBool) {
opCode = spv::OpGroupNonUniformLogicalAnd;
} else {
@@ -5716,6 +5773,11 @@
case glslang::EOpSubgroupInclusiveOr:
case glslang::EOpSubgroupExclusiveOr:
case glslang::EOpSubgroupClusteredOr:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedOr:
+ case glslang::EOpSubgroupPartitionedInclusiveOr:
+ case glslang::EOpSubgroupPartitionedExclusiveOr:
+#endif
if (isBool) {
opCode = spv::OpGroupNonUniformLogicalOr;
} else {
@@ -5726,6 +5788,11 @@
case glslang::EOpSubgroupInclusiveXor:
case glslang::EOpSubgroupExclusiveXor:
case glslang::EOpSubgroupClusteredXor:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedXor:
+ case glslang::EOpSubgroupPartitionedInclusiveXor:
+ case glslang::EOpSubgroupPartitionedExclusiveXor:
+#endif
if (isBool) {
opCode = spv::OpGroupNonUniformLogicalXor;
} else {
@@ -5786,6 +5853,35 @@
case glslang::EOpSubgroupClusteredXor:
spvGroupOperands.push_back(spv::GroupOperationClusteredReduce);
break;
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedAdd:
+ case glslang::EOpSubgroupPartitionedMul:
+ case glslang::EOpSubgroupPartitionedMin:
+ case glslang::EOpSubgroupPartitionedMax:
+ case glslang::EOpSubgroupPartitionedAnd:
+ case glslang::EOpSubgroupPartitionedOr:
+ case glslang::EOpSubgroupPartitionedXor:
+ spvGroupOperands.push_back(spv::GroupOperationPartitionedReduceNV);
+ break;
+ case glslang::EOpSubgroupPartitionedInclusiveAdd:
+ case glslang::EOpSubgroupPartitionedInclusiveMul:
+ case glslang::EOpSubgroupPartitionedInclusiveMin:
+ case glslang::EOpSubgroupPartitionedInclusiveMax:
+ case glslang::EOpSubgroupPartitionedInclusiveAnd:
+ case glslang::EOpSubgroupPartitionedInclusiveOr:
+ case glslang::EOpSubgroupPartitionedInclusiveXor:
+ spvGroupOperands.push_back(spv::GroupOperationPartitionedInclusiveScanNV);
+ break;
+ case glslang::EOpSubgroupPartitionedExclusiveAdd:
+ case glslang::EOpSubgroupPartitionedExclusiveMul:
+ case glslang::EOpSubgroupPartitionedExclusiveMin:
+ case glslang::EOpSubgroupPartitionedExclusiveMax:
+ case glslang::EOpSubgroupPartitionedExclusiveAnd:
+ case glslang::EOpSubgroupPartitionedExclusiveOr:
+ case glslang::EOpSubgroupPartitionedExclusiveXor:
+ spvGroupOperands.push_back(spv::GroupOperationPartitionedExclusiveScanNV);
+ break;
+#endif
}
// Push back the operands next.
@@ -5974,6 +6070,29 @@
case glslang::EOpSubgroupClusteredOr:
case glslang::EOpSubgroupClusteredXor:
case glslang::EOpSubgroupQuadBroadcast:
+#ifdef NV_EXTENSIONS
+ case glslang::EOpSubgroupPartitionedAdd:
+ case glslang::EOpSubgroupPartitionedMul:
+ case glslang::EOpSubgroupPartitionedMin:
+ case glslang::EOpSubgroupPartitionedMax:
+ case glslang::EOpSubgroupPartitionedAnd:
+ case glslang::EOpSubgroupPartitionedOr:
+ case glslang::EOpSubgroupPartitionedXor:
+ case glslang::EOpSubgroupPartitionedInclusiveAdd:
+ case glslang::EOpSubgroupPartitionedInclusiveMul:
+ case glslang::EOpSubgroupPartitionedInclusiveMin:
+ case glslang::EOpSubgroupPartitionedInclusiveMax:
+ case glslang::EOpSubgroupPartitionedInclusiveAnd:
+ case glslang::EOpSubgroupPartitionedInclusiveOr:
+ case glslang::EOpSubgroupPartitionedInclusiveXor:
+ case glslang::EOpSubgroupPartitionedExclusiveAdd:
+ case glslang::EOpSubgroupPartitionedExclusiveMul:
+ case glslang::EOpSubgroupPartitionedExclusiveMin:
+ case glslang::EOpSubgroupPartitionedExclusiveMax:
+ case glslang::EOpSubgroupPartitionedExclusiveAnd:
+ case glslang::EOpSubgroupPartitionedExclusiveOr:
+ case glslang::EOpSubgroupPartitionedExclusiveXor:
+#endif
return createSubgroupOperation(op, typeId, operands, typeProxy);
#ifdef AMD_EXTENSIONS
diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp
old mode 100644
new mode 100755
index bfe9f7a..061c13e
--- a/SPIRV/doc.cpp
+++ b/SPIRV/doc.cpp
@@ -728,17 +728,22 @@
}
}
-const int GroupOperationCeiling = 4;
+const int GroupOperationCeiling = 9;
const char* GroupOperationString(int gop)
{
switch (gop)
{
- case 0: return "Reduce";
- case 1: return "InclusiveScan";
- case 2: return "ExclusiveScan";
- case 3: return "ClusteredReduce";
+ case GroupOperationReduce: return "Reduce";
+ case GroupOperationInclusiveScan: return "InclusiveScan";
+ case GroupOperationExclusiveScan: return "ExclusiveScan";
+ case GroupOperationClusteredReduce: return "ClusteredReduce";
+#ifdef NV_EXTENSIONS
+ case GroupOperationPartitionedReduceNV: return "PartitionedReduceNV";
+ case GroupOperationPartitionedInclusiveScanNV: return "PartitionedInclusiveScanNV";
+ case GroupOperationPartitionedExclusiveScanNV: return "PartitionedExclusiveScanNV";
+#endif
case GroupOperationCeiling:
default: return "Bad";
@@ -876,6 +881,7 @@
case 5255: return "ShaderViewportMaskNV";
case 5259: return "ShaderStereoViewNV";
case 5260: return "PerViewAttributesNV";
+ case 5297: return "GroupNonUniformPartitionedNV";
#endif
case 5265: return "FragmentFullyCoveredEXT";
@@ -1272,6 +1278,9 @@
case OpDecorateStringGOOGLE: return "OpDecorateStringGOOGLE";
case OpMemberDecorateStringGOOGLE: return "OpMemberDecorateStringGOOGLE";
+#ifdef NV_EXTENSIONS
+ case 5296: return "OpGroupNonUniformPartitionNV";
+#endif
case OpcodeCeiling:
default:
return "Bad";
@@ -3138,6 +3147,11 @@
InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Coordinate'");
InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Fragment Index'");
#endif
+
+#ifdef NV_EXTENSIONS
+ InstructionDesc[OpGroupNonUniformPartitionNV].capabilities.push_back(CapabilityGroupNonUniformPartitionedNV);
+ InstructionDesc[OpGroupNonUniformPartitionNV].operands.push(OperandId, "X");
+#endif
}
}; // end spv namespace
diff --git a/SPIRV/spirv.hpp b/SPIRV/spirv.hpp
index 1fc24fb..3c599f1 100755
--- a/SPIRV/spirv.hpp
+++ b/SPIRV/spirv.hpp
@@ -572,6 +572,9 @@
GroupOperationInclusiveScan = 1,
GroupOperationExclusiveScan = 2,
GroupOperationClusteredReduce = 3,
+ GroupOperationPartitionedReduceNV = 6,
+ GroupOperationPartitionedInclusiveScanNV = 7,
+ GroupOperationPartitionedExclusiveScanNV = 8,
GroupOperationMax = 0x7fffffff,
};
@@ -688,6 +691,7 @@
CapabilityShaderStereoViewNV = 5259,
CapabilityPerViewAttributesNV = 5260,
CapabilityFragmentFullyCoveredEXT = 5265,
+ CapabilityGroupNonUniformPartitionedNV = 5297,
CapabilitySubgroupShuffleINTEL = 5568,
CapabilitySubgroupBufferBlockIOINTEL = 5569,
CapabilitySubgroupImageBlockIOINTEL = 5570,
@@ -1051,6 +1055,7 @@
OpGroupSMaxNonUniformAMD = 5007,
OpFragmentMaskFetchAMD = 5011,
OpFragmentFetchAMD = 5012,
+ OpGroupNonUniformPartitionNV = 5296,
OpSubgroupShuffleINTEL = 5571,
OpSubgroupShuffleDownINTEL = 5572,
OpSubgroupShuffleUpINTEL = 5573,