SPV: Implement the extension SPV_KHR_shader_ballot
diff --git a/SPIRV/CMakeLists.txt b/SPIRV/CMakeLists.txt
index 48a6c46..ad72276 100755
--- a/SPIRV/CMakeLists.txt
+++ b/SPIRV/CMakeLists.txt
@@ -13,6 +13,7 @@
 set(HEADERS
     spirv.hpp
     GLSL.std.450.h
+    GLSL.ext.KHR.h
     GlslangToSpv.h
     Logger.h
     SpvBuilder.h
diff --git a/SPIRV/GLSL.ext.KHR.h b/SPIRV/GLSL.ext.KHR.h
new file mode 100644
index 0000000..7ce795f
--- /dev/null
+++ b/SPIRV/GLSL.ext.KHR.h
@@ -0,0 +1,51 @@
+/*
+** Copyright (c) 2014-2016 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and/or associated documentation files (the "Materials"),
+** to deal in the Materials without restriction, including without limitation
+** the rights to use, copy, modify, merge, publish, distribute, sublicense,
+** and/or sell copies of the Materials, and to permit persons to whom the
+** Materials are furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Materials.
+**
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+** IN THE MATERIALS.
+*/
+
+#ifndef GLSLextKHR_H
+#define GLSLextKHR_H
+
+enum BuiltIn;
+enum Op;
+enum Capability;
+
+static const int GLSLextKHRVersion = 100;
+static const int GLSLextKHRRevision = 1;
+
+// SPV_KHR_shader_ballot
+static const char* const E_SPV_KHR_shader_ballot        = "SPV_KHR_shader_ballot";
+
+static const BuiltIn BuiltInSubgroupEqMaskKHR           = static_cast<BuiltIn>(4416);
+static const BuiltIn BuiltInSubgroupGeMaskKHR           = static_cast<BuiltIn>(4417);
+static const BuiltIn BuiltInSubgroupGtMaskKHR           = static_cast<BuiltIn>(4418);
+static const BuiltIn BuiltInSubgroupLeMaskKHR           = static_cast<BuiltIn>(4419);
+static const BuiltIn BuiltInSubgroupLtMaskKHR           = static_cast<BuiltIn>(4420);
+
+static const Op OpSubgroupBallotKHR                     = static_cast<Op>(4421);
+static const Op OpSubgroupFirstInvocationKHR            = static_cast<Op>(4422);
+
+static const Capability CapabilitySubgroupBallotKHR     = static_cast<Capability>(4423);
+
+#endif  // #ifndef GLSLextKHR_H
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 7286588..8372dbb 100755
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -42,9 +42,10 @@
 #include "GlslangToSpv.h"
 #include "SpvBuilder.h"
 namespace spv {
-   #include "GLSL.std.450.h"
+    #include "GLSL.std.450.h"
+    #include "GLSL.ext.KHR.h"
 #ifdef AMD_EXTENSIONS
-   #include "GLSL.ext.AMD.h"
+    #include "GLSL.ext.AMD.h"
 #endif
 }
 
@@ -154,7 +155,7 @@
     spv::Id createConversion(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id destTypeId, spv::Id operand, glslang::TBasicType typeProxy);
     spv::Id makeSmearedConstant(spv::Id constant, int vectorSize);
     spv::Id createAtomicOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector<spv::Id>& operands, glslang::TBasicType typeProxy);
-    spv::Id createInvocationsOperation(glslang::TOperator op, spv::Id typeId, spv::Id operand, glslang::TBasicType typeProxy);
+    spv::Id createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector<spv::Id>& operands, glslang::TBasicType typeProxy);
 #ifdef AMD_EXTENSIONS
     spv::Id CreateInvocationsVectorOperation(spv::Op op, spv::Id typeId, spv::Id operand);
 #endif
@@ -521,16 +522,40 @@
     case glslang::EbvLocalInvocationId:    return spv::BuiltInLocalInvocationId;
     case glslang::EbvLocalInvocationIndex: return spv::BuiltInLocalInvocationIndex;
     case glslang::EbvGlobalInvocationId:   return spv::BuiltInGlobalInvocationId;
+
     case glslang::EbvSubGroupSize:
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+        return spv::BuiltInSubgroupSize;
+
     case glslang::EbvSubGroupInvocation:
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+        return spv::BuiltInSubgroupLocalInvocationId;
+
     case glslang::EbvSubGroupEqMask:
+        builder.addExtension(spv::E_SPV_KHR_shader_ballot);
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+        return spv::BuiltInSubgroupEqMaskKHR;
+
     case glslang::EbvSubGroupGeMask:
+        builder.addExtension(spv::E_SPV_KHR_shader_ballot);
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+        return spv::BuiltInSubgroupGeMaskKHR;
+
     case glslang::EbvSubGroupGtMask:
+        builder.addExtension(spv::E_SPV_KHR_shader_ballot);
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+        return spv::BuiltInSubgroupGtMaskKHR;
+
     case glslang::EbvSubGroupLeMask:
+        builder.addExtension(spv::E_SPV_KHR_shader_ballot);
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+        return spv::BuiltInSubgroupLeMaskKHR;
+
     case glslang::EbvSubGroupLtMask:
-        // TODO: Add SPIR-V builtin ID.
-        logger->missingFunctionality("shader ballot");
-        return spv::BuiltInMax;
+        builder.addExtension(spv::E_SPV_KHR_shader_ballot);
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+        return spv::BuiltInSubgroupLtMaskKHR;
+
 #ifdef AMD_EXTENSIONS
     case glslang::EbvBaryCoordNoPersp:          return spv::BuiltInBaryCoordNoPerspAMD;
     case glslang::EbvBaryCoordNoPerspCentroid:  return spv::BuiltInBaryCoordNoPerspCentroidAMD;
@@ -3610,10 +3635,6 @@
 
     case glslang::EOpBallot:
     case glslang::EOpReadFirstInvocation:
-        logger->missingFunctionality("shader ballot");
-        libCall = spv::GLSLstd450Bad;
-        break;
-
     case glslang::EOpAnyInvocation:
     case glslang::EOpAllInvocations:
     case glslang::EOpAllInvocationsEqual:
@@ -3625,7 +3646,11 @@
     case glslang::EOpMaxInvocationsNonUniform:
     case glslang::EOpAddInvocationsNonUniform:
 #endif
-        return createInvocationsOperation(op, typeId, operand, typeProxy);
+    {
+        std::vector<spv::Id> operands;
+        operands.push_back(operand);
+        return createInvocationsOperation(op, typeId, operands, typeProxy);
+    }
 
 #ifdef AMD_EXTENSIONS
     case glslang::EOpMbcnt:
@@ -3959,113 +3984,149 @@
 }
 
 // Create group invocation operations.
-spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op, spv::Id typeId, spv::Id operand, glslang::TBasicType typeProxy)
+spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector<spv::Id>& operands, glslang::TBasicType typeProxy)
 {
     bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64;
     bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble;
 
-    builder.addCapability(spv::CapabilityGroups);
+    spv::Op opCode = spv::OpNop;
 
-    std::vector<spv::Id> operands;
-    operands.push_back(builder.makeUintConstant(spv::ScopeSubgroup));
+    std::vector<spv::Id> spvGroupOperands;
+    if (op == glslang::EOpBallot || op == glslang::EOpReadFirstInvocation) {
+        builder.addExtension(spv::E_SPV_KHR_shader_ballot);
+        builder.addCapability(spv::CapabilitySubgroupBallotKHR);
+    } else {
+        builder.addCapability(spv::CapabilityGroups);
+
+        spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup));
 #ifdef AMD_EXTENSIONS
-    if (op == glslang::EOpMinInvocations || op == glslang::EOpMaxInvocations || op == glslang::EOpAddInvocations ||
-        op == glslang::EOpMinInvocationsNonUniform || op == glslang::EOpMaxInvocationsNonUniform || op == glslang::EOpAddInvocationsNonUniform)
-        operands.push_back(spv::GroupOperationReduce);
+        if (op == glslang::EOpMinInvocations || op == glslang::EOpMaxInvocations || op == glslang::EOpAddInvocations ||
+            op == glslang::EOpMinInvocationsNonUniform || op == glslang::EOpMaxInvocationsNonUniform || op == glslang::EOpAddInvocationsNonUniform)
+            spvGroupOperands.push_back(spv::GroupOperationReduce);
 #endif
-    operands.push_back(operand);
+    }
+
+    for (auto opIt = operands.begin(); opIt != operands.end(); ++opIt)
+        spvGroupOperands.push_back(*opIt);
 
     switch (op) {
     case glslang::EOpAnyInvocation:
+        opCode = spv::OpGroupAny;
+        break;
     case glslang::EOpAllInvocations:
-        return builder.createOp(op == glslang::EOpAnyInvocation ? spv::OpGroupAny : spv::OpGroupAll, typeId, operands);
-
+        opCode = spv::OpGroupAll;
+        break;
     case glslang::EOpAllInvocationsEqual:
     {
-        spv::Id groupAll = builder.createOp(spv::OpGroupAll, typeId, operands);
-        spv::Id groupAny = builder.createOp(spv::OpGroupAny, typeId, operands);
+        spv::Id groupAll = builder.createOp(spv::OpGroupAll, typeId, spvGroupOperands);
+        spv::Id groupAny = builder.createOp(spv::OpGroupAny, typeId, spvGroupOperands);
 
         return builder.createBinOp(spv::OpLogicalOr, typeId, groupAll,
                                    builder.createUnaryOp(spv::OpLogicalNot, typeId, groupAny));
     }
+
+    case glslang::EOpReadInvocation:
+        opCode = spv::OpGroupBroadcast;
+        break;
+    case glslang::EOpReadFirstInvocation:
+        opCode = spv::OpSubgroupFirstInvocationKHR;
+        break;
+    case glslang::EOpBallot:
+    {
+        // NOTE: According to the spec, the result type of "OpSubgroupBallotKHR" must be a 4 component vector of 32
+        // bit integer types. The GLSL built-in function "ballotARB()" assumes the maximum number of invocations in
+        // a subgroup is 64. Thus, we have to convert uvec4.xy to uint64_t as follow:
+        //
+        //     result = Bitcast(SubgroupBallotKHR(Predicate).xy)
+        //
+        spv::Id uintType  = builder.makeUintType(32);
+        spv::Id uvec4Type = builder.makeVectorType(uintType, 4);
+        spv::Id result = builder.createOp(spv::OpSubgroupBallotKHR, uvec4Type, spvGroupOperands);
+
+        std::vector<spv::Id> components;
+        components.push_back(builder.createCompositeExtract(result, uintType, 0));
+        components.push_back(builder.createCompositeExtract(result, uintType, 1));
+
+        spv::Id uvec2Type = builder.makeVectorType(uintType, 2);
+        return builder.createUnaryOp(spv::OpBitcast, typeId,
+                                     builder.createCompositeConstruct(uvec2Type, components));
+    }
+
 #ifdef AMD_EXTENSIONS
     case glslang::EOpMinInvocations:
     case glslang::EOpMaxInvocations:
     case glslang::EOpAddInvocations:
-    {
-        spv::Op spvOp = spv::OpNop;
         if (op == glslang::EOpMinInvocations) {
             if (isFloat)
-                spvOp = spv::OpGroupFMin;
+                opCode = spv::OpGroupFMin;
             else {
                 if (isUnsigned)
-                    spvOp = spv::OpGroupUMin;
+                    opCode = spv::OpGroupUMin;
                 else
-                    spvOp = spv::OpGroupSMin;
+                    opCode = spv::OpGroupSMin;
             }
         } else if (op == glslang::EOpMaxInvocations) {
             if (isFloat)
-                spvOp = spv::OpGroupFMax;
+                opCode = spv::OpGroupFMax;
             else {
                 if (isUnsigned)
-                    spvOp = spv::OpGroupUMax;
+                    opCode = spv::OpGroupUMax;
                 else
-                    spvOp = spv::OpGroupSMax;
+                    opCode = spv::OpGroupSMax;
             }
         } else {
             if (isFloat)
-                spvOp = spv::OpGroupFAdd;
+                opCode = spv::OpGroupFAdd;
             else
-                spvOp = spv::OpGroupIAdd;
+                opCode = spv::OpGroupIAdd;
         }
 
         if (builder.isVectorType(typeId))
-            return CreateInvocationsVectorOperation(spvOp, typeId, operand);
-        else
-            return builder.createOp(spvOp, typeId, operands);
-    }
+            return CreateInvocationsVectorOperation(opCode, typeId, operands[0]);
+
+        break;
     case glslang::EOpMinInvocationsNonUniform:
     case glslang::EOpMaxInvocationsNonUniform:
     case glslang::EOpAddInvocationsNonUniform:
-    {
-        spv::Op spvOp = spv::OpNop;
         if (op == glslang::EOpMinInvocationsNonUniform) {
             if (isFloat)
-                spvOp = spv::OpGroupFMinNonUniformAMD;
+                opCode = spv::OpGroupFMinNonUniformAMD;
             else {
                 if (isUnsigned)
-                    spvOp = spv::OpGroupUMinNonUniformAMD;
+                    opCode = spv::OpGroupUMinNonUniformAMD;
                 else
-                    spvOp = spv::OpGroupSMinNonUniformAMD;
+                    opCode = spv::OpGroupSMinNonUniformAMD;
             }
         }
         else if (op == glslang::EOpMaxInvocationsNonUniform) {
             if (isFloat)
-                spvOp = spv::OpGroupFMaxNonUniformAMD;
+                opCode = spv::OpGroupFMaxNonUniformAMD;
             else {
                 if (isUnsigned)
-                    spvOp = spv::OpGroupUMaxNonUniformAMD;
+                    opCode = spv::OpGroupUMaxNonUniformAMD;
                 else
-                    spvOp = spv::OpGroupSMaxNonUniformAMD;
+                    opCode = spv::OpGroupSMaxNonUniformAMD;
             }
         }
         else {
             if (isFloat)
-                spvOp = spv::OpGroupFAddNonUniformAMD;
+                opCode = spv::OpGroupFAddNonUniformAMD;
             else
-                spvOp = spv::OpGroupIAddNonUniformAMD;
+                opCode = spv::OpGroupIAddNonUniformAMD;
         }
 
         if (builder.isVectorType(typeId))
-            return CreateInvocationsVectorOperation(spvOp, typeId, operand);
-        else
-            return builder.createOp(spvOp, typeId, operands);
-    }
+            return CreateInvocationsVectorOperation(opCode, typeId, operands[0]);
+
+        break;
 #endif
     default:
         logger->missingFunctionality("invocation operation");
         return spv::NoResult;
     }
+
+    assert(opCode != spv::OpNop);
+    return builder.createOp(opCode, typeId, spvGroupOperands);
 }
 
 #ifdef AMD_EXTENSIONS
@@ -4256,9 +4317,7 @@
         break;
 
     case glslang::EOpReadInvocation:
-        logger->missingFunctionality("shader ballot");
-        libCall = spv::GLSLstd450Bad;
-        break;
+        return createInvocationsOperation(op, typeId, operands, typeProxy);
 
 #ifdef AMD_EXTENSIONS
     case glslang::EOpSwizzleInvocations:
@@ -4825,7 +4884,7 @@
     if (extBuiltinMap.find(name) != extBuiltinMap.end())
         return extBuiltinMap[name];
     else {
-        builder.addExtensions(name);
+        builder.addExtension(name);
         spv::Id extBuiltins = builder.import(name);
         extBuiltinMap[name] = extBuiltins;
         return extBuiltins;
diff --git a/SPIRV/SpvBuilder.cpp b/SPIRV/SpvBuilder.cpp
index a881d1b..7aaa51f 100644
--- a/SPIRV/SpvBuilder.cpp
+++ b/SPIRV/SpvBuilder.cpp
@@ -2318,9 +2318,9 @@
         capInst.dump(out);
     }
 
-    for (int e = 0; e < (int)extensions.size(); ++e) {
+    for (auto it = extensions.cbegin(); it != extensions.cend(); ++it) {
         Instruction extInst(0, 0, OpExtension);
-        extInst.addStringOperand(extensions[e]);
+        extInst.addStringOperand(*it);
         extInst.dump(out);
     }
 
diff --git a/SPIRV/SpvBuilder.h b/SPIRV/SpvBuilder.h
index 38dc1fa..6e709ea 100755
--- a/SPIRV/SpvBuilder.h
+++ b/SPIRV/SpvBuilder.h
@@ -71,7 +71,7 @@
         sourceVersion = version;
     }
     void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); }
-    void addExtensions(const char* ext) { extensions.push_back(ext); }
+    void addExtension(const char* ext) { extensions.insert(ext); }
     Id import(const char*);
     void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem)
     {
@@ -552,7 +552,7 @@
 
     SourceLanguage source;
     int sourceVersion;
-    std::vector<const char*> extensions;
+    std::set<const char*> extensions;
     std::vector<const char*> sourceExtensions;
     AddressingModel addressModel;
     MemoryModel memoryModel;
diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp
index a25f7c0..d2161dd 100755
--- a/SPIRV/doc.cpp
+++ b/SPIRV/doc.cpp
@@ -45,14 +45,15 @@
 #include <cstring>
 #include <algorithm>
 
-#ifdef AMD_EXTENSIONS
 namespace spv {
     extern "C" {
         // Include C-based headers that don't have a namespace
+        #include "GLSL.ext.KHR.h"
+#ifdef AMD_EXTENSIONS
         #include "GLSL.ext.AMD.h"
+#endif
     }
 }
-#endif
 
 namespace spv {
 
@@ -312,6 +313,12 @@
     case BuiltInCeiling:
     default: return "Bad";
 
+    case 4416: return "SubgroupEqMaskKHR";
+    case 4417: return "SubgroupGeMaskKHR";
+    case 4418: return "SubgroupGtMaskKHR";
+    case 4419: return "SubgroupLeMaskKHR";
+    case 4420: return "SubgroupLtMaskKHR";
+
 #ifdef AMD_EXTENSIONS
     case 4992: return "BaryCoordNoPerspAMD";
     case 4993: return "BaryCoordNoPerspCentroidAMD";
@@ -799,6 +806,8 @@
 
     case CapabilityCeiling:
     default: return "Bad";
+
+    case 4423: return "SubgroupBallotKHR";
     }
 }
 
@@ -1131,6 +1140,9 @@
     default:
         return "Bad";
 
+    case 4421: return "OpSubgroupBallotKHR";
+    case 4422: return "OpSubgroupFirstInvocationKHR";
+
 #ifdef AMD_EXTENSIONS
     case 5000: return "OpGroupIAddNonUniformAMD";
     case 5001: return "OpGroupFAddNonUniformAMD";
@@ -1146,11 +1158,7 @@
 
 // The set of objects that hold all the instruction/operand
 // parameterization information.
-#ifdef AMD_EXTENSIONS
 InstructionParameters InstructionDesc[OpCodeMask + 1];
-#else
-InstructionParameters InstructionDesc[OpcodeCeiling];
-#endif
 OperandParameters ExecutionModeOperands[ExecutionModeCeiling];
 OperandParameters DecorationOperands[DecorationCeiling];
 
@@ -2742,6 +2750,10 @@
     InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Wait Events'");
     InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Ret Event'");
 
+    InstructionDesc[OpSubgroupBallotKHR].operands.push(OperandId, "'Predicate'");
+
+    InstructionDesc[OpSubgroupFirstInvocationKHR].operands.push(OperandId, "'Value'");
+
 #ifdef AMD_EXTENSIONS
     InstructionDesc[OpGroupIAddNonUniformAMD].capabilities.push_back(CapabilityGroups);
     InstructionDesc[OpGroupIAddNonUniformAMD].operands.push(OperandScope, "'Execution'");
diff --git a/Test/baseResults/spv.shaderBallot.comp.out b/Test/baseResults/spv.shaderBallot.comp.out
index cb2e013..c60db16 100644
--- a/Test/baseResults/spv.shaderBallot.comp.out
+++ b/Test/baseResults/spv.shaderBallot.comp.out
@@ -5,16 +5,18 @@
 Linked compute stage:
 
 
-Missing functionality: shader ballot
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 241
+// Id's are bound by 245
 
                               Capability Shader
                               Capability Int64
+                              Capability Groups
+                              Capability SubgroupBallotKHR
+                              Extension  "SPV_KHR_shader_ballot"
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint GLCompute 4  "main" 10 22 24 27 30 33
+                              EntryPoint GLCompute 4  "main" 10 12 21 23 26 29 32
                               ExecutionMode 4 LocalSize 8 8 1
                               Source GLSL 450
                               SourceExtension  "GL_ARB_gpu_shader_int64"
@@ -22,293 +24,304 @@
                               Name 4  "main"
                               Name 8  "invocation"
                               Name 10  "gl_SubGroupInvocationARB"
-                              Name 13  "gl_SubGroupSizeARB"
-                              Name 20  "relMask"
-                              Name 22  "gl_SubGroupEqMaskARB"
-                              Name 24  "gl_SubGroupGeMaskARB"
-                              Name 27  "gl_SubGroupGtMaskARB"
-                              Name 30  "gl_SubGroupLeMaskARB"
-                              Name 33  "gl_SubGroupLtMaskARB"
-                              Name 48  "Buffers"
-                              MemberName 48(Buffers) 0  "f4"
-                              MemberName 48(Buffers) 1  "i4"
-                              MemberName 48(Buffers) 2  "u4"
-                              Name 51  "data"
-                              MemberDecorate 48(Buffers) 0 Offset 0
-                              MemberDecorate 48(Buffers) 1 Offset 16
-                              MemberDecorate 48(Buffers) 2 Offset 32
-                              Decorate 48(Buffers) BufferBlock
-                              Decorate 51(data) DescriptorSet 0
-                              Decorate 51(data) Binding 0
-                              Decorate 240 BuiltIn WorkgroupSize
+                              Name 12  "gl_SubGroupSizeARB"
+                              Name 19  "relMask"
+                              Name 21  "gl_SubGroupEqMaskARB"
+                              Name 23  "gl_SubGroupGeMaskARB"
+                              Name 26  "gl_SubGroupGtMaskARB"
+                              Name 29  "gl_SubGroupLeMaskARB"
+                              Name 32  "gl_SubGroupLtMaskARB"
+                              Name 52  "Buffers"
+                              MemberName 52(Buffers) 0  "f4"
+                              MemberName 52(Buffers) 1  "i4"
+                              MemberName 52(Buffers) 2  "u4"
+                              Name 55  "data"
+                              Decorate 10(gl_SubGroupInvocationARB) BuiltIn SubgroupLocalInvocationId
+                              Decorate 12(gl_SubGroupSizeARB) BuiltIn SubgroupSize
+                              Decorate 21(gl_SubGroupEqMaskARB) BuiltIn SubgroupEqMaskKHR
+                              Decorate 23(gl_SubGroupGeMaskARB) BuiltIn SubgroupGeMaskKHR
+                              Decorate 26(gl_SubGroupGtMaskARB) BuiltIn SubgroupGtMaskKHR
+                              Decorate 29(gl_SubGroupLeMaskARB) BuiltIn SubgroupLeMaskKHR
+                              Decorate 32(gl_SubGroupLtMaskARB) BuiltIn SubgroupLtMaskKHR
+                              MemberDecorate 52(Buffers) 0 Offset 0
+                              MemberDecorate 52(Buffers) 1 Offset 16
+                              MemberDecorate 52(Buffers) 2 Offset 32
+                              Decorate 52(Buffers) BufferBlock
+                              Decorate 55(data) DescriptorSet 0
+                              Decorate 55(data) Binding 0
+                              Decorate 244 BuiltIn WorkgroupSize
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeInt 32 0
                7:             TypePointer Function 6(int)
                9:             TypePointer Input 6(int)
 10(gl_SubGroupInvocationARB):      9(ptr) Variable Input
-              12:             TypePointer UniformConstant 6(int)
-13(gl_SubGroupSizeARB):     12(ptr) Variable UniformConstant
-              16:      6(int) Constant 4
-              18:             TypeInt 64 0
-              19:             TypePointer Function 18(int)
-              21:             TypePointer Input 18(int)
-22(gl_SubGroupEqMaskARB):     21(ptr) Variable Input
-24(gl_SubGroupGeMaskARB):     21(ptr) Variable Input
-27(gl_SubGroupGtMaskARB):     21(ptr) Variable Input
-30(gl_SubGroupLeMaskARB):     21(ptr) Variable Input
-33(gl_SubGroupLtMaskARB):     21(ptr) Variable Input
-              37:             TypeBool
-              38:    37(bool) ConstantTrue
-              43:             TypeFloat 32
-              44:             TypeVector 43(float) 4
-              45:             TypeInt 32 1
-              46:             TypeVector 45(int) 4
-              47:             TypeVector 6(int) 4
-     48(Buffers):             TypeStruct 44(fvec4) 46(ivec4) 47(ivec4)
-              49:             TypeArray 48(Buffers) 16
-              50:             TypePointer Uniform 49
-        51(data):     50(ptr) Variable Uniform
-              53:     45(int) Constant 0
-              54:      6(int) Constant 0
-              55:             TypePointer Uniform 43(float)
-              62:     45(int) Constant 1
-              63:             TypeVector 43(float) 2
-              64:             TypePointer Uniform 44(fvec4)
-              74:     45(int) Constant 2
-              75:             TypeVector 43(float) 3
-              85:     45(int) Constant 3
-              92:             TypePointer Uniform 45(int)
-              99:             TypeVector 45(int) 2
-             100:             TypePointer Uniform 46(ivec4)
-             110:             TypeVector 45(int) 3
-             126:             TypePointer Uniform 6(int)
-             133:             TypeVector 6(int) 2
-             134:             TypePointer Uniform 47(ivec4)
-             144:             TypeVector 6(int) 3
-             238:      6(int) Constant 8
-             239:      6(int) Constant 1
-             240:  144(ivec3) ConstantComposite 238 238 239
+12(gl_SubGroupSizeARB):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 64 0
+              18:             TypePointer Function 17(int)
+              20:             TypePointer Input 17(int)
+21(gl_SubGroupEqMaskARB):     20(ptr) Variable Input
+23(gl_SubGroupGeMaskARB):     20(ptr) Variable Input
+26(gl_SubGroupGtMaskARB):     20(ptr) Variable Input
+29(gl_SubGroupLeMaskARB):     20(ptr) Variable Input
+32(gl_SubGroupLtMaskARB):     20(ptr) Variable Input
+              36:             TypeBool
+              37:    36(bool) ConstantTrue
+              38:             TypeVector 6(int) 4
+              42:             TypeVector 6(int) 2
+              48:             TypeFloat 32
+              49:             TypeVector 48(float) 4
+              50:             TypeInt 32 1
+              51:             TypeVector 50(int) 4
+     52(Buffers):             TypeStruct 49(fvec4) 51(ivec4) 38(ivec4)
+              53:             TypeArray 52(Buffers) 15
+              54:             TypePointer Uniform 53
+        55(data):     54(ptr) Variable Uniform
+              57:     50(int) Constant 0
+              58:      6(int) Constant 0
+              59:             TypePointer Uniform 48(float)
+              63:      6(int) Constant 3
+              67:     50(int) Constant 1
+              68:             TypeVector 48(float) 2
+              69:             TypePointer Uniform 49(fvec4)
+              79:     50(int) Constant 2
+              80:             TypeVector 48(float) 3
+              90:     50(int) Constant 3
+              97:             TypePointer Uniform 50(int)
+             104:             TypeVector 50(int) 2
+             105:             TypePointer Uniform 51(ivec4)
+             115:             TypeVector 50(int) 3
+             131:             TypePointer Uniform 6(int)
+             138:             TypePointer Uniform 38(ivec4)
+             148:             TypeVector 6(int) 3
+             242:      6(int) Constant 8
+             243:      6(int) Constant 1
+             244:  148(ivec3) ConstantComposite 242 242 243
          4(main):           2 Function None 3
                5:             Label
    8(invocation):      7(ptr) Variable Function
-     20(relMask):     19(ptr) Variable Function
+     19(relMask):     18(ptr) Variable Function
               11:      6(int) Load 10(gl_SubGroupInvocationARB)
-              14:      6(int) Load 13(gl_SubGroupSizeARB)
-              15:      6(int) IAdd 11 14
-              17:      6(int) UMod 15 16
-                              Store 8(invocation) 17
-              23:     18(int) Load 22(gl_SubGroupEqMaskARB)
-              25:     18(int) Load 24(gl_SubGroupGeMaskARB)
-              26:     18(int) IAdd 23 25
-              28:     18(int) Load 27(gl_SubGroupGtMaskARB)
-              29:     18(int) IAdd 26 28
-              31:     18(int) Load 30(gl_SubGroupLeMaskARB)
-              32:     18(int) IAdd 29 31
-              34:     18(int) Load 33(gl_SubGroupLtMaskARB)
-              35:     18(int) IAdd 32 34
-                              Store 20(relMask) 35
-              36:     18(int) Load 20(relMask)
-              39:     18(int) ExtInst 1(GLSL.std.450) 0(Unknown) 38
-              40:    37(bool) IEqual 36 39
-                              SelectionMerge 42 None
-                              BranchConditional 40 41 159
-              41:               Label
-              52:      6(int)   Load 8(invocation)
-              56:     55(ptr)   AccessChain 51(data) 53 53 54
-              57:   43(float)   Load 56
-              58:      6(int)   Load 8(invocation)
-              59:   43(float)   ExtInst 1(GLSL.std.450) 0(Unknown) 57 58
-              60:     55(ptr)   AccessChain 51(data) 52 53 54
-                                Store 60 59
-              61:      6(int)   Load 8(invocation)
-              65:     64(ptr)   AccessChain 51(data) 62 53
-              66:   44(fvec4)   Load 65
-              67:   63(fvec2)   VectorShuffle 66 66 0 1
-              68:      6(int)   Load 8(invocation)
-              69:   63(fvec2)   ExtInst 1(GLSL.std.450) 0(Unknown) 67 68
-              70:     64(ptr)   AccessChain 51(data) 61 53
-              71:   44(fvec4)   Load 70
-              72:   44(fvec4)   VectorShuffle 71 69 4 5 2 3
-                                Store 70 72
+              13:      6(int) Load 12(gl_SubGroupSizeARB)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              22:     17(int) Load 21(gl_SubGroupEqMaskARB)
+              24:     17(int) Load 23(gl_SubGroupGeMaskARB)
+              25:     17(int) IAdd 22 24
+              27:     17(int) Load 26(gl_SubGroupGtMaskARB)
+              28:     17(int) IAdd 25 27
+              30:     17(int) Load 29(gl_SubGroupLeMaskARB)
+              31:     17(int) IAdd 28 30
+              33:     17(int) Load 32(gl_SubGroupLtMaskARB)
+              34:     17(int) IAdd 31 33
+                              Store 19(relMask) 34
+              35:     17(int) Load 19(relMask)
+              39:   38(ivec4) SubgroupBallotKHR 37
+              40:      6(int) CompositeExtract 39 0
+              41:      6(int) CompositeExtract 39 1
+              43:   42(ivec2) CompositeConstruct 40 41
+              44:     17(int) Bitcast 43
+              45:    36(bool) IEqual 35 44
+                              SelectionMerge 47 None
+                              BranchConditional 45 46 163
+              46:               Label
+              56:      6(int)   Load 8(invocation)
+              60:     59(ptr)   AccessChain 55(data) 57 57 58
+              61:   48(float)   Load 60
+              62:      6(int)   Load 8(invocation)
+              64:   48(float)   GroupBroadcast 63 61 62
+              65:     59(ptr)   AccessChain 55(data) 56 57 58
+                                Store 65 64
+              66:      6(int)   Load 8(invocation)
+              70:     69(ptr)   AccessChain 55(data) 67 57
+              71:   49(fvec4)   Load 70
+              72:   68(fvec2)   VectorShuffle 71 71 0 1
               73:      6(int)   Load 8(invocation)
-              76:     64(ptr)   AccessChain 51(data) 74 53
-              77:   44(fvec4)   Load 76
-              78:   75(fvec3)   VectorShuffle 77 77 0 1 2
-              79:      6(int)   Load 8(invocation)
-              80:   75(fvec3)   ExtInst 1(GLSL.std.450) 0(Unknown) 78 79
-              81:     64(ptr)   AccessChain 51(data) 73 53
-              82:   44(fvec4)   Load 81
-              83:   44(fvec4)   VectorShuffle 82 80 4 5 6 3
-                                Store 81 83
+              74:   68(fvec2)   GroupBroadcast 63 72 73
+              75:     69(ptr)   AccessChain 55(data) 66 57
+              76:   49(fvec4)   Load 75
+              77:   49(fvec4)   VectorShuffle 76 74 4 5 2 3
+                                Store 75 77
+              78:      6(int)   Load 8(invocation)
+              81:     69(ptr)   AccessChain 55(data) 79 57
+              82:   49(fvec4)   Load 81
+              83:   80(fvec3)   VectorShuffle 82 82 0 1 2
               84:      6(int)   Load 8(invocation)
-              86:     64(ptr)   AccessChain 51(data) 85 53
-              87:   44(fvec4)   Load 86
-              88:      6(int)   Load 8(invocation)
-              89:   44(fvec4)   ExtInst 1(GLSL.std.450) 0(Unknown) 87 88
-              90:     64(ptr)   AccessChain 51(data) 84 53
-                                Store 90 89
-              91:      6(int)   Load 8(invocation)
-              93:     92(ptr)   AccessChain 51(data) 53 62 54
-              94:     45(int)   Load 93
-              95:      6(int)   Load 8(invocation)
-              96:     45(int)   ExtInst 1(GLSL.std.450) 0(Unknown) 94 95
-              97:     92(ptr)   AccessChain 51(data) 91 62 54
-                                Store 97 96
-              98:      6(int)   Load 8(invocation)
-             101:    100(ptr)   AccessChain 51(data) 62 62
-             102:   46(ivec4)   Load 101
-             103:   99(ivec2)   VectorShuffle 102 102 0 1
-             104:      6(int)   Load 8(invocation)
-             105:   99(ivec2)   ExtInst 1(GLSL.std.450) 0(Unknown) 103 104
-             106:    100(ptr)   AccessChain 51(data) 98 62
-             107:   46(ivec4)   Load 106
-             108:   46(ivec4)   VectorShuffle 107 105 4 5 2 3
-                                Store 106 108
+              85:   80(fvec3)   GroupBroadcast 63 83 84
+              86:     69(ptr)   AccessChain 55(data) 78 57
+              87:   49(fvec4)   Load 86
+              88:   49(fvec4)   VectorShuffle 87 85 4 5 6 3
+                                Store 86 88
+              89:      6(int)   Load 8(invocation)
+              91:     69(ptr)   AccessChain 55(data) 90 57
+              92:   49(fvec4)   Load 91
+              93:      6(int)   Load 8(invocation)
+              94:   49(fvec4)   GroupBroadcast 63 92 93
+              95:     69(ptr)   AccessChain 55(data) 89 57
+                                Store 95 94
+              96:      6(int)   Load 8(invocation)
+              98:     97(ptr)   AccessChain 55(data) 57 67 58
+              99:     50(int)   Load 98
+             100:      6(int)   Load 8(invocation)
+             101:     50(int)   GroupBroadcast 63 99 100
+             102:     97(ptr)   AccessChain 55(data) 96 67 58
+                                Store 102 101
+             103:      6(int)   Load 8(invocation)
+             106:    105(ptr)   AccessChain 55(data) 67 67
+             107:   51(ivec4)   Load 106
+             108:  104(ivec2)   VectorShuffle 107 107 0 1
              109:      6(int)   Load 8(invocation)
-             111:    100(ptr)   AccessChain 51(data) 74 62
-             112:   46(ivec4)   Load 111
-             113:  110(ivec3)   VectorShuffle 112 112 0 1 2
+             110:  104(ivec2)   GroupBroadcast 63 108 109
+             111:    105(ptr)   AccessChain 55(data) 103 67
+             112:   51(ivec4)   Load 111
+             113:   51(ivec4)   VectorShuffle 112 110 4 5 2 3
+                                Store 111 113
              114:      6(int)   Load 8(invocation)
-             115:  110(ivec3)   ExtInst 1(GLSL.std.450) 0(Unknown) 113 114
-             116:    100(ptr)   AccessChain 51(data) 109 62
-             117:   46(ivec4)   Load 116
-             118:   46(ivec4)   VectorShuffle 117 115 4 5 6 3
-                                Store 116 118
+             116:    105(ptr)   AccessChain 55(data) 79 67
+             117:   51(ivec4)   Load 116
+             118:  115(ivec3)   VectorShuffle 117 117 0 1 2
              119:      6(int)   Load 8(invocation)
-             120:    100(ptr)   AccessChain 51(data) 85 62
-             121:   46(ivec4)   Load 120
-             122:      6(int)   Load 8(invocation)
-             123:   46(ivec4)   ExtInst 1(GLSL.std.450) 0(Unknown) 121 122
-             124:    100(ptr)   AccessChain 51(data) 119 62
-                                Store 124 123
-             125:      6(int)   Load 8(invocation)
-             127:    126(ptr)   AccessChain 51(data) 53 74 54
-             128:      6(int)   Load 127
-             129:      6(int)   Load 8(invocation)
-             130:      6(int)   ExtInst 1(GLSL.std.450) 0(Unknown) 128 129
-             131:    126(ptr)   AccessChain 51(data) 125 74 54
-                                Store 131 130
-             132:      6(int)   Load 8(invocation)
-             135:    134(ptr)   AccessChain 51(data) 62 74
-             136:   47(ivec4)   Load 135
-             137:  133(ivec2)   VectorShuffle 136 136 0 1
-             138:      6(int)   Load 8(invocation)
-             139:  133(ivec2)   ExtInst 1(GLSL.std.450) 0(Unknown) 137 138
-             140:    134(ptr)   AccessChain 51(data) 132 74
-             141:   47(ivec4)   Load 140
-             142:   47(ivec4)   VectorShuffle 141 139 4 5 2 3
-                                Store 140 142
-             143:      6(int)   Load 8(invocation)
-             145:    134(ptr)   AccessChain 51(data) 74 74
-             146:   47(ivec4)   Load 145
-             147:  144(ivec3)   VectorShuffle 146 146 0 1 2
-             148:      6(int)   Load 8(invocation)
-             149:  144(ivec3)   ExtInst 1(GLSL.std.450) 0(Unknown) 147 148
-             150:    134(ptr)   AccessChain 51(data) 143 74
-             151:   47(ivec4)   Load 150
-             152:   47(ivec4)   VectorShuffle 151 149 4 5 6 3
-                                Store 150 152
-             153:      6(int)   Load 8(invocation)
-             154:    134(ptr)   AccessChain 51(data) 85 74
-             155:   47(ivec4)   Load 154
-             156:      6(int)   Load 8(invocation)
-             157:   47(ivec4)   ExtInst 1(GLSL.std.450) 0(Unknown) 155 156
-             158:    134(ptr)   AccessChain 51(data) 153 74
-                                Store 158 157
-                                Branch 42
-             159:               Label
+             120:  115(ivec3)   GroupBroadcast 63 118 119
+             121:    105(ptr)   AccessChain 55(data) 114 67
+             122:   51(ivec4)   Load 121
+             123:   51(ivec4)   VectorShuffle 122 120 4 5 6 3
+                                Store 121 123
+             124:      6(int)   Load 8(invocation)
+             125:    105(ptr)   AccessChain 55(data) 90 67
+             126:   51(ivec4)   Load 125
+             127:      6(int)   Load 8(invocation)
+             128:   51(ivec4)   GroupBroadcast 63 126 127
+             129:    105(ptr)   AccessChain 55(data) 124 67
+                                Store 129 128
+             130:      6(int)   Load 8(invocation)
+             132:    131(ptr)   AccessChain 55(data) 57 79 58
+             133:      6(int)   Load 132
+             134:      6(int)   Load 8(invocation)
+             135:      6(int)   GroupBroadcast 63 133 134
+             136:    131(ptr)   AccessChain 55(data) 130 79 58
+                                Store 136 135
+             137:      6(int)   Load 8(invocation)
+             139:    138(ptr)   AccessChain 55(data) 67 79
+             140:   38(ivec4)   Load 139
+             141:   42(ivec2)   VectorShuffle 140 140 0 1
+             142:      6(int)   Load 8(invocation)
+             143:   42(ivec2)   GroupBroadcast 63 141 142
+             144:    138(ptr)   AccessChain 55(data) 137 79
+             145:   38(ivec4)   Load 144
+             146:   38(ivec4)   VectorShuffle 145 143 4 5 2 3
+                                Store 144 146
+             147:      6(int)   Load 8(invocation)
+             149:    138(ptr)   AccessChain 55(data) 79 79
+             150:   38(ivec4)   Load 149
+             151:  148(ivec3)   VectorShuffle 150 150 0 1 2
+             152:      6(int)   Load 8(invocation)
+             153:  148(ivec3)   GroupBroadcast 63 151 152
+             154:    138(ptr)   AccessChain 55(data) 147 79
+             155:   38(ivec4)   Load 154
+             156:   38(ivec4)   VectorShuffle 155 153 4 5 6 3
+                                Store 154 156
+             157:      6(int)   Load 8(invocation)
+             158:    138(ptr)   AccessChain 55(data) 90 79
+             159:   38(ivec4)   Load 158
              160:      6(int)   Load 8(invocation)
-             161:     55(ptr)   AccessChain 51(data) 53 53 54
-             162:   43(float)   Load 161
-             163:   43(float)   ExtInst 1(GLSL.std.450) 0(Unknown) 162
-             164:     55(ptr)   AccessChain 51(data) 160 53 54
-                                Store 164 163
-             165:      6(int)   Load 8(invocation)
-             166:     64(ptr)   AccessChain 51(data) 62 53
-             167:   44(fvec4)   Load 166
-             168:   63(fvec2)   VectorShuffle 167 167 0 1
-             169:   63(fvec2)   ExtInst 1(GLSL.std.450) 0(Unknown) 168
-             170:     64(ptr)   AccessChain 51(data) 165 53
-             171:   44(fvec4)   Load 170
-             172:   44(fvec4)   VectorShuffle 171 169 4 5 2 3
-                                Store 170 172
-             173:      6(int)   Load 8(invocation)
-             174:     64(ptr)   AccessChain 51(data) 74 53
-             175:   44(fvec4)   Load 174
-             176:   75(fvec3)   VectorShuffle 175 175 0 1 2
-             177:   75(fvec3)   ExtInst 1(GLSL.std.450) 0(Unknown) 176
-             178:     64(ptr)   AccessChain 51(data) 173 53
-             179:   44(fvec4)   Load 178
-             180:   44(fvec4)   VectorShuffle 179 177 4 5 6 3
-                                Store 178 180
-             181:      6(int)   Load 8(invocation)
-             182:     64(ptr)   AccessChain 51(data) 85 53
-             183:   44(fvec4)   Load 182
-             184:   44(fvec4)   ExtInst 1(GLSL.std.450) 0(Unknown) 183
-             185:     64(ptr)   AccessChain 51(data) 181 53
-                                Store 185 184
-             186:      6(int)   Load 8(invocation)
-             187:     92(ptr)   AccessChain 51(data) 53 62 54
-             188:     45(int)   Load 187
-             189:     45(int)   ExtInst 1(GLSL.std.450) 0(Unknown) 188
-             190:     92(ptr)   AccessChain 51(data) 186 62 54
-                                Store 190 189
-             191:      6(int)   Load 8(invocation)
-             192:    100(ptr)   AccessChain 51(data) 62 62
-             193:   46(ivec4)   Load 192
-             194:   99(ivec2)   VectorShuffle 193 193 0 1
-             195:   99(ivec2)   ExtInst 1(GLSL.std.450) 0(Unknown) 194
-             196:    100(ptr)   AccessChain 51(data) 191 62
-             197:   46(ivec4)   Load 196
-             198:   46(ivec4)   VectorShuffle 197 195 4 5 2 3
-                                Store 196 198
-             199:      6(int)   Load 8(invocation)
-             200:    100(ptr)   AccessChain 51(data) 74 62
-             201:   46(ivec4)   Load 200
-             202:  110(ivec3)   VectorShuffle 201 201 0 1 2
-             203:  110(ivec3)   ExtInst 1(GLSL.std.450) 0(Unknown) 202
-             204:    100(ptr)   AccessChain 51(data) 199 62
-             205:   46(ivec4)   Load 204
-             206:   46(ivec4)   VectorShuffle 205 203 4 5 6 3
-                                Store 204 206
-             207:      6(int)   Load 8(invocation)
-             208:    100(ptr)   AccessChain 51(data) 85 62
-             209:   46(ivec4)   Load 208
-             210:   46(ivec4)   ExtInst 1(GLSL.std.450) 0(Unknown) 209
-             211:    100(ptr)   AccessChain 51(data) 207 62
-                                Store 211 210
-             212:      6(int)   Load 8(invocation)
-             213:    126(ptr)   AccessChain 51(data) 53 74 54
-             214:      6(int)   Load 213
-             215:      6(int)   ExtInst 1(GLSL.std.450) 0(Unknown) 214
-             216:    126(ptr)   AccessChain 51(data) 212 74 54
-                                Store 216 215
-             217:      6(int)   Load 8(invocation)
-             218:    134(ptr)   AccessChain 51(data) 62 74
-             219:   47(ivec4)   Load 218
-             220:  133(ivec2)   VectorShuffle 219 219 0 1
-             221:  133(ivec2)   ExtInst 1(GLSL.std.450) 0(Unknown) 220
-             222:    134(ptr)   AccessChain 51(data) 217 74
-             223:   47(ivec4)   Load 222
-             224:   47(ivec4)   VectorShuffle 223 221 4 5 2 3
-                                Store 222 224
-             225:      6(int)   Load 8(invocation)
-             226:    134(ptr)   AccessChain 51(data) 74 74
-             227:   47(ivec4)   Load 226
-             228:  144(ivec3)   VectorShuffle 227 227 0 1 2
-             229:  144(ivec3)   ExtInst 1(GLSL.std.450) 0(Unknown) 228
-             230:    134(ptr)   AccessChain 51(data) 225 74
-             231:   47(ivec4)   Load 230
-             232:   47(ivec4)   VectorShuffle 231 229 4 5 6 3
-                                Store 230 232
-             233:      6(int)   Load 8(invocation)
-             234:    134(ptr)   AccessChain 51(data) 85 74
-             235:   47(ivec4)   Load 234
-             236:   47(ivec4)   ExtInst 1(GLSL.std.450) 0(Unknown) 235
-             237:    134(ptr)   AccessChain 51(data) 233 74
-                                Store 237 236
-                                Branch 42
-              42:             Label
+             161:   38(ivec4)   GroupBroadcast 63 159 160
+             162:    138(ptr)   AccessChain 55(data) 157 79
+                                Store 162 161
+                                Branch 47
+             163:               Label
+             164:      6(int)   Load 8(invocation)
+             165:     59(ptr)   AccessChain 55(data) 57 57 58
+             166:   48(float)   Load 165
+             167:   48(float)   SubgroupFirstInvocationKHR 166
+             168:     59(ptr)   AccessChain 55(data) 164 57 58
+                                Store 168 167
+             169:      6(int)   Load 8(invocation)
+             170:     69(ptr)   AccessChain 55(data) 67 57
+             171:   49(fvec4)   Load 170
+             172:   68(fvec2)   VectorShuffle 171 171 0 1
+             173:   68(fvec2)   SubgroupFirstInvocationKHR 172
+             174:     69(ptr)   AccessChain 55(data) 169 57
+             175:   49(fvec4)   Load 174
+             176:   49(fvec4)   VectorShuffle 175 173 4 5 2 3
+                                Store 174 176
+             177:      6(int)   Load 8(invocation)
+             178:     69(ptr)   AccessChain 55(data) 79 57
+             179:   49(fvec4)   Load 178
+             180:   80(fvec3)   VectorShuffle 179 179 0 1 2
+             181:   80(fvec3)   SubgroupFirstInvocationKHR 180
+             182:     69(ptr)   AccessChain 55(data) 177 57
+             183:   49(fvec4)   Load 182
+             184:   49(fvec4)   VectorShuffle 183 181 4 5 6 3
+                                Store 182 184
+             185:      6(int)   Load 8(invocation)
+             186:     69(ptr)   AccessChain 55(data) 90 57
+             187:   49(fvec4)   Load 186
+             188:   49(fvec4)   SubgroupFirstInvocationKHR 187
+             189:     69(ptr)   AccessChain 55(data) 185 57
+                                Store 189 188
+             190:      6(int)   Load 8(invocation)
+             191:     97(ptr)   AccessChain 55(data) 57 67 58
+             192:     50(int)   Load 191
+             193:     50(int)   SubgroupFirstInvocationKHR 192
+             194:     97(ptr)   AccessChain 55(data) 190 67 58
+                                Store 194 193
+             195:      6(int)   Load 8(invocation)
+             196:    105(ptr)   AccessChain 55(data) 67 67
+             197:   51(ivec4)   Load 196
+             198:  104(ivec2)   VectorShuffle 197 197 0 1
+             199:  104(ivec2)   SubgroupFirstInvocationKHR 198
+             200:    105(ptr)   AccessChain 55(data) 195 67
+             201:   51(ivec4)   Load 200
+             202:   51(ivec4)   VectorShuffle 201 199 4 5 2 3
+                                Store 200 202
+             203:      6(int)   Load 8(invocation)
+             204:    105(ptr)   AccessChain 55(data) 79 67
+             205:   51(ivec4)   Load 204
+             206:  115(ivec3)   VectorShuffle 205 205 0 1 2
+             207:  115(ivec3)   SubgroupFirstInvocationKHR 206
+             208:    105(ptr)   AccessChain 55(data) 203 67
+             209:   51(ivec4)   Load 208
+             210:   51(ivec4)   VectorShuffle 209 207 4 5 6 3
+                                Store 208 210
+             211:      6(int)   Load 8(invocation)
+             212:    105(ptr)   AccessChain 55(data) 90 67
+             213:   51(ivec4)   Load 212
+             214:   51(ivec4)   SubgroupFirstInvocationKHR 213
+             215:    105(ptr)   AccessChain 55(data) 211 67
+                                Store 215 214
+             216:      6(int)   Load 8(invocation)
+             217:    131(ptr)   AccessChain 55(data) 57 79 58
+             218:      6(int)   Load 217
+             219:      6(int)   SubgroupFirstInvocationKHR 218
+             220:    131(ptr)   AccessChain 55(data) 216 79 58
+                                Store 220 219
+             221:      6(int)   Load 8(invocation)
+             222:    138(ptr)   AccessChain 55(data) 67 79
+             223:   38(ivec4)   Load 222
+             224:   42(ivec2)   VectorShuffle 223 223 0 1
+             225:   42(ivec2)   SubgroupFirstInvocationKHR 224
+             226:    138(ptr)   AccessChain 55(data) 221 79
+             227:   38(ivec4)   Load 226
+             228:   38(ivec4)   VectorShuffle 227 225 4 5 2 3
+                                Store 226 228
+             229:      6(int)   Load 8(invocation)
+             230:    138(ptr)   AccessChain 55(data) 79 79
+             231:   38(ivec4)   Load 230
+             232:  148(ivec3)   VectorShuffle 231 231 0 1 2
+             233:  148(ivec3)   SubgroupFirstInvocationKHR 232
+             234:    138(ptr)   AccessChain 55(data) 229 79
+             235:   38(ivec4)   Load 234
+             236:   38(ivec4)   VectorShuffle 235 233 4 5 6 3
+                                Store 234 236
+             237:      6(int)   Load 8(invocation)
+             238:    138(ptr)   AccessChain 55(data) 90 79
+             239:   38(ivec4)   Load 238
+             240:   38(ivec4)   SubgroupFirstInvocationKHR 239
+             241:    138(ptr)   AccessChain 55(data) 237 79
+                                Store 241 240
+                                Branch 47
+              47:             Label
                               Return
                               FunctionEnd
diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp
index 6d2e9c0..9578d45 100644
--- a/glslang/MachineIndependent/Initialize.cpp
+++ b/glslang/MachineIndependent/Initialize.cpp
@@ -3862,7 +3862,6 @@
             symbolTable.setFunctionExtensions("readInvocationARB",      1, &E_GL_ARB_shader_ballot);
             symbolTable.setFunctionExtensions("readFirstInvocationARB", 1, &E_GL_ARB_shader_ballot);
 
-            BuiltInVariable("gl_SubGroupSizeARB",       EbvSubGroupSize,       symbolTable);
             BuiltInVariable("gl_SubGroupInvocationARB", EbvSubGroupInvocation, symbolTable);
             BuiltInVariable("gl_SubGroupEqMaskARB",     EbvSubGroupEqMask,     symbolTable);
             BuiltInVariable("gl_SubGroupGeMaskARB",     EbvSubGroupGeMask,     symbolTable);
@@ -3870,6 +3869,10 @@
             BuiltInVariable("gl_SubGroupLeMaskARB",     EbvSubGroupLeMask,     symbolTable);
             BuiltInVariable("gl_SubGroupLtMaskARB",     EbvSubGroupLtMask,     symbolTable);
 
+            if (spvVersion.vulkan >= 100)
+                // Treat "gl_SubGroupSizeARB" as shader input instead of uniform for Vulkan
+                SpecialQualifier("gl_SubGroupSizeARB", EvqVaryingIn, EbvSubGroupSize, symbolTable);
+
             symbolTable.setFunctionExtensions("anyInvocationARB",       1, &E_GL_ARB_shader_group_vote);
             symbolTable.setFunctionExtensions("allInvocationsARB",      1, &E_GL_ARB_shader_group_vote);
             symbolTable.setFunctionExtensions("allInvocationsEqualARB", 1, &E_GL_ARB_shader_group_vote);