Implement extension GL_NV_shader_atomic_int64
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 9dec286..54dc61f 100755
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -4749,12 +4749,12 @@
case glslang::EOpAtomicMin:
case glslang::EOpImageAtomicMin:
case glslang::EOpAtomicCounterMin:
- opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMin : spv::OpAtomicSMin;
+ opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMin : spv::OpAtomicSMin;
break;
case glslang::EOpAtomicMax:
case glslang::EOpImageAtomicMax:
case glslang::EOpAtomicCounterMax:
- opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMax : spv::OpAtomicSMax;
+ opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMax : spv::OpAtomicSMax;
break;
case glslang::EOpAtomicAnd:
case glslang::EOpImageAtomicAnd:
@@ -4795,6 +4795,9 @@
break;
}
+ if (typeProxy == glslang::EbtInt64 || typeProxy == glslang::EbtUint64)
+ builder.addCapability(spv::CapabilityInt64Atomics);
+
// Sort out the operands
// - mapping from glslang -> SPV
// - there are extra SPV operands with no glslang source
diff --git a/Test/baseResults/spv.atomicInt64.comp.out b/Test/baseResults/spv.atomicInt64.comp.out
new file mode 100644
index 0000000..31a1d63
--- /dev/null
+++ b/Test/baseResults/spv.atomicInt64.comp.out
@@ -0,0 +1,215 @@
+spv.atomicInt64.comp
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 149
+
+ Capability Shader
+ Capability Int64
+ Capability Int64Atomics
+ 1: ExtInstImport "GLSL.std.450"
+ MemoryModel Logical GLSL450
+ EntryPoint GLCompute 4 "main"
+ ExecutionMode 4 LocalSize 16 16 1
+ Source GLSL 450
+ SourceExtension "GL_ARB_gpu_shader_int64"
+ SourceExtension "GL_NV_shader_atomic_int64"
+ Name 4 "main"
+ Name 8 "i64"
+ Name 12 "u64"
+ Name 14 "Buffer"
+ MemberName 14(Buffer) 0 "i64"
+ MemberName 14(Buffer) 1 "u64"
+ Name 16 "buf"
+ Name 84 "Struct"
+ MemberName 84(Struct) 0 "i64"
+ MemberName 84(Struct) 1 "u64"
+ Name 86 "s"
+ MemberDecorate 14(Buffer) 0 Offset 0
+ MemberDecorate 14(Buffer) 1 Offset 8
+ Decorate 14(Buffer) BufferBlock
+ Decorate 16(buf) DescriptorSet 0
+ Decorate 16(buf) Binding 0
+ Decorate 148 BuiltIn WorkgroupSize
+ 2: TypeVoid
+ 3: TypeFunction 2
+ 6: TypeInt 64 1
+ 7: TypePointer Function 6(int)
+ 9: 6(int) Constant 0 0
+ 10: TypeInt 64 0
+ 11: TypePointer Function 10(int)
+ 13: 10(int) Constant 0 0
+ 14(Buffer): TypeStruct 6(int) 10(int)
+ 15: TypePointer Uniform 14(Buffer)
+ 16(buf): 15(ptr) Variable Uniform
+ 17: TypeInt 32 1
+ 18: 17(int) Constant 0
+ 19: TypePointer Uniform 6(int)
+ 21: 6(int) Constant 4294967272 4294967295
+ 22: TypeInt 32 0
+ 23: 22(int) Constant 1
+ 24: 22(int) Constant 0
+ 28: 17(int) Constant 1
+ 29: TypePointer Uniform 10(int)
+ 31: 10(int) Constant 15 0
+ 84(Struct): TypeStruct 6(int) 10(int)
+ 85: TypePointer Workgroup 84(Struct)
+ 86(s): 85(ptr) Variable Workgroup
+ 87: TypePointer Workgroup 6(int)
+ 92: TypePointer Workgroup 10(int)
+ 146: TypeVector 22(int) 3
+ 147: 22(int) Constant 16
+ 148: 146(ivec3) ConstantComposite 147 147 23
+ 4(main): 2 Function None 3
+ 5: Label
+ 8(i64): 7(ptr) Variable Function
+ 12(u64): 11(ptr) Variable Function
+ Store 8(i64) 9
+ Store 12(u64) 13
+ 20: 19(ptr) AccessChain 16(buf) 18
+ 25: 6(int) AtomicSMin 20 23 24 21
+ 26: 6(int) Load 8(i64)
+ 27: 6(int) IAdd 26 25
+ Store 8(i64) 27
+ 30: 29(ptr) AccessChain 16(buf) 28
+ 32: 10(int) AtomicUMin 30 23 24 31
+ 33: 10(int) Load 12(u64)
+ 34: 10(int) IAdd 33 32
+ Store 12(u64) 34
+ 35: 19(ptr) AccessChain 16(buf) 18
+ 36: 6(int) AtomicSMax 35 23 24 21
+ 37: 6(int) Load 8(i64)
+ 38: 6(int) IAdd 37 36
+ Store 8(i64) 38
+ 39: 29(ptr) AccessChain 16(buf) 28
+ 40: 10(int) AtomicUMax 39 23 24 31
+ 41: 10(int) Load 12(u64)
+ 42: 10(int) IAdd 41 40
+ Store 12(u64) 42
+ 43: 19(ptr) AccessChain 16(buf) 18
+ 44: 6(int) AtomicAnd 43 23 24 21
+ 45: 6(int) Load 8(i64)
+ 46: 6(int) IAdd 45 44
+ Store 8(i64) 46
+ 47: 29(ptr) AccessChain 16(buf) 28
+ 48: 10(int) AtomicAnd 47 23 24 31
+ 49: 10(int) Load 12(u64)
+ 50: 10(int) IAdd 49 48
+ Store 12(u64) 50
+ 51: 19(ptr) AccessChain 16(buf) 18
+ 52: 6(int) AtomicOr 51 23 24 21
+ 53: 6(int) Load 8(i64)
+ 54: 6(int) IAdd 53 52
+ Store 8(i64) 54
+ 55: 29(ptr) AccessChain 16(buf) 28
+ 56: 10(int) AtomicOr 55 23 24 31
+ 57: 10(int) Load 12(u64)
+ 58: 10(int) IAdd 57 56
+ Store 12(u64) 58
+ 59: 19(ptr) AccessChain 16(buf) 18
+ 60: 6(int) AtomicXor 59 23 24 21
+ 61: 6(int) Load 8(i64)
+ 62: 6(int) IAdd 61 60
+ Store 8(i64) 62
+ 63: 29(ptr) AccessChain 16(buf) 28
+ 64: 10(int) AtomicXor 63 23 24 31
+ 65: 10(int) Load 12(u64)
+ 66: 10(int) IAdd 65 64
+ Store 12(u64) 66
+ 67: 19(ptr) AccessChain 16(buf) 18
+ 68: 6(int) AtomicIAdd 67 23 24 21
+ 69: 6(int) Load 8(i64)
+ 70: 6(int) IAdd 69 68
+ Store 8(i64) 70
+ 71: 19(ptr) AccessChain 16(buf) 18
+ 72: 6(int) AtomicExchange 71 23 24 21
+ 73: 6(int) Load 8(i64)
+ 74: 6(int) IAdd 73 72
+ Store 8(i64) 74
+ 75: 19(ptr) AccessChain 16(buf) 18
+ 76: 6(int) Load 8(i64)
+ 77: 6(int) AtomicCompareExchange 75 23 24 24 76 21
+ 78: 6(int) Load 8(i64)
+ 79: 6(int) IAdd 78 77
+ Store 8(i64) 79
+ 80: 6(int) Load 8(i64)
+ 81: 19(ptr) AccessChain 16(buf) 18
+ Store 81 80
+ 82: 10(int) Load 12(u64)
+ 83: 29(ptr) AccessChain 16(buf) 28
+ Store 83 82
+ Store 8(i64) 9
+ Store 12(u64) 13
+ 88: 87(ptr) AccessChain 86(s) 18
+ 89: 6(int) AtomicSMin 88 23 24 21
+ 90: 6(int) Load 8(i64)
+ 91: 6(int) IAdd 90 89
+ Store 8(i64) 91
+ 93: 92(ptr) AccessChain 86(s) 28
+ 94: 10(int) AtomicUMin 93 23 24 31
+ 95: 10(int) Load 12(u64)
+ 96: 10(int) IAdd 95 94
+ Store 12(u64) 96
+ 97: 87(ptr) AccessChain 86(s) 18
+ 98: 6(int) AtomicSMax 97 23 24 21
+ 99: 6(int) Load 8(i64)
+ 100: 6(int) IAdd 99 98
+ Store 8(i64) 100
+ 101: 92(ptr) AccessChain 86(s) 28
+ 102: 10(int) AtomicUMax 101 23 24 31
+ 103: 10(int) Load 12(u64)
+ 104: 10(int) IAdd 103 102
+ Store 12(u64) 104
+ 105: 87(ptr) AccessChain 86(s) 18
+ 106: 6(int) AtomicAnd 105 23 24 21
+ 107: 6(int) Load 8(i64)
+ 108: 6(int) IAdd 107 106
+ Store 8(i64) 108
+ 109: 92(ptr) AccessChain 86(s) 28
+ 110: 10(int) AtomicAnd 109 23 24 31
+ 111: 10(int) Load 12(u64)
+ 112: 10(int) IAdd 111 110
+ Store 12(u64) 112
+ 113: 87(ptr) AccessChain 86(s) 18
+ 114: 6(int) AtomicOr 113 23 24 21
+ 115: 6(int) Load 8(i64)
+ 116: 6(int) IAdd 115 114
+ Store 8(i64) 116
+ 117: 92(ptr) AccessChain 86(s) 28
+ 118: 10(int) AtomicOr 117 23 24 31
+ 119: 10(int) Load 12(u64)
+ 120: 10(int) IAdd 119 118
+ Store 12(u64) 120
+ 121: 87(ptr) AccessChain 86(s) 18
+ 122: 6(int) AtomicXor 121 23 24 21
+ 123: 6(int) Load 8(i64)
+ 124: 6(int) IAdd 123 122
+ Store 8(i64) 124
+ 125: 92(ptr) AccessChain 86(s) 28
+ 126: 10(int) AtomicXor 125 23 24 31
+ 127: 10(int) Load 12(u64)
+ 128: 10(int) IAdd 127 126
+ Store 12(u64) 128
+ 129: 87(ptr) AccessChain 86(s) 18
+ 130: 6(int) AtomicIAdd 129 23 24 21
+ 131: 6(int) Load 8(i64)
+ 132: 6(int) IAdd 131 130
+ Store 8(i64) 132
+ 133: 87(ptr) AccessChain 86(s) 18
+ 134: 6(int) AtomicExchange 133 23 24 21
+ 135: 6(int) Load 8(i64)
+ 136: 6(int) IAdd 135 134
+ Store 8(i64) 136
+ 137: 87(ptr) AccessChain 86(s) 18
+ 138: 6(int) Load 8(i64)
+ 139: 6(int) AtomicCompareExchange 137 23 24 24 138 21
+ 140: 6(int) Load 8(i64)
+ 141: 6(int) IAdd 140 139
+ Store 8(i64) 141
+ 142: 6(int) Load 8(i64)
+ 143: 87(ptr) AccessChain 86(s) 18
+ Store 143 142
+ 144: 10(int) Load 12(u64)
+ 145: 92(ptr) AccessChain 86(s) 28
+ Store 145 144
+ Return
+ FunctionEnd
diff --git a/Test/spv.atomicInt64.comp b/Test/spv.atomicInt64.comp
new file mode 100644
index 0000000..a56c7ec
--- /dev/null
+++ b/Test/spv.atomicInt64.comp
@@ -0,0 +1,79 @@
+#version 450 core
+
+#extension GL_ARB_gpu_shader_int64: enable
+#extension GL_NV_shader_atomic_int64: enable
+
+layout(local_size_x = 16, local_size_y = 16) in;
+
+layout(binding = 0) buffer Buffer
+{
+ int64_t i64;
+ uint64_t u64;
+} buf;
+
+struct Struct
+{
+ int64_t i64;
+ uint64_t u64;
+};
+
+shared Struct s;
+
+void main()
+{
+ const int64_t i64c = -24;
+ const uint64_t u64c = 0xF00000000F;
+
+ // Test shader storage block
+ int64_t i64 = 0;
+ uint64_t u64 = 0;
+
+ i64 += atomicMin(buf.i64, i64c);
+ u64 += atomicMin(buf.u64, u64c);
+
+ i64 += atomicMax(buf.i64, i64c);
+ u64 += atomicMax(buf.u64, u64c);
+
+ i64 += atomicAnd(buf.i64, i64c);
+ u64 += atomicAnd(buf.u64, u64c);
+
+ i64 += atomicOr(buf.i64, i64c);
+ u64 += atomicOr(buf.u64, u64c);
+
+ i64 += atomicXor(buf.i64, i64c);
+ u64 += atomicXor(buf.u64, u64c);
+
+ i64 += atomicAdd(buf.i64, i64c);
+ i64 += atomicExchange(buf.i64, i64c);
+ i64 += atomicCompSwap(buf.i64, i64c, i64);
+
+ buf.i64 = i64;
+ buf.u64 = u64;
+
+ // Test shared variable
+ i64 = 0;
+ u64 = 0;
+
+ i64 += atomicMin(s.i64, i64c);
+ u64 += atomicMin(s.u64, u64c);
+
+ i64 += atomicMax(s.i64, i64c);
+ u64 += atomicMax(s.u64, u64c);
+
+ i64 += atomicAnd(s.i64, i64c);
+ u64 += atomicAnd(s.u64, u64c);
+
+ i64 += atomicOr(s.i64, i64c);
+ u64 += atomicOr(s.u64, u64c);
+
+ i64 += atomicXor(s.i64, i64c);
+ u64 += atomicXor(s.u64, u64c);
+
+ i64 += atomicAdd(s.i64, i64c);
+ i64 += atomicExchange(s.i64, i64c);
+ i64 += atomicCompSwap(s.i64, i64c, i64);
+
+ s.i64 = i64;
+ s.u64 = u64;
+}
+
diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp
index f8138ff..a837efd 100644
--- a/glslang/MachineIndependent/Initialize.cpp
+++ b/glslang/MachineIndependent/Initialize.cpp
@@ -923,6 +923,32 @@
"\n");
}
+#ifdef NV_EXTENSIONS
+ if (profile != EEsProfile && version >= 440) {
+ commonBuiltins.append(
+ "uint64_t atomicMin(coherent volatile inout uint64_t, uint64_t);"
+ " int64_t atomicMin(coherent volatile inout int64_t, int64_t);"
+
+ "uint64_t atomicMax(coherent volatile inout uint64_t, uint64_t);"
+ " int64_t atomicMax(coherent volatile inout int64_t, int64_t);"
+
+ "uint64_t atomicAnd(coherent volatile inout uint64_t, uint64_t);"
+ " int64_t atomicAnd(coherent volatile inout int64_t, int64_t);"
+
+ "uint64_t atomicOr (coherent volatile inout uint64_t, uint64_t);"
+ " int64_t atomicOr (coherent volatile inout int64_t, int64_t);"
+
+ "uint64_t atomicXor(coherent volatile inout uint64_t, uint64_t);"
+ " int64_t atomicXor(coherent volatile inout int64_t, int64_t);"
+
+ " int64_t atomicAdd(coherent volatile inout int64_t, int64_t);"
+ " int64_t atomicExchange(coherent volatile inout int64_t, int64_t);"
+ " int64_t atomicCompSwap(coherent volatile inout int64_t, int64_t, int64_t);"
+
+ "\n");
+ }
+#endif
+
if ((profile == EEsProfile && version >= 310) ||
(profile != EEsProfile && version >= 450)) {
commonBuiltins.append(
diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp
index 7f721a0..33e35e8 100644
--- a/glslang/MachineIndependent/ParseHelper.cpp
+++ b/glslang/MachineIndependent/ParseHelper.cpp
@@ -1551,6 +1551,23 @@
break;
}
+#ifdef NV_EXTENSIONS
+ case EOpAtomicAdd:
+ case EOpAtomicMin:
+ case EOpAtomicMax:
+ case EOpAtomicAnd:
+ case EOpAtomicOr:
+ case EOpAtomicXor:
+ case EOpAtomicExchange:
+ case EOpAtomicCompSwap:
+ {
+ if (arg0->getType().getBasicType() == EbtInt64 || arg0->getType().getBasicType() == EbtUint64)
+ requireExtensions(loc, 1, &E_GL_NV_shader_atomic_int64, fnCandidate.getName().c_str());
+
+ break;
+ }
+#endif
+
case EOpInterpolateAtCentroid:
case EOpInterpolateAtSample:
case EOpInterpolateAtOffset:
diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp
index 807fe42..b1893b3 100644
--- a/glslang/MachineIndependent/Versions.cpp
+++ b/glslang/MachineIndependent/Versions.cpp
@@ -211,6 +211,7 @@
extensionBehavior[E_GL_NV_viewport_array2] = EBhDisable;
extensionBehavior[E_GL_NV_stereo_view_rendering] = EBhDisable;
extensionBehavior[E_GL_NVX_multiview_per_view_attributes] = EBhDisable;
+ extensionBehavior[E_GL_NV_shader_atomic_int64] = EBhDisable;
#endif
// AEP
@@ -343,6 +344,7 @@
"#define GL_NV_sample_mask_override_coverage 1\n"
"#define GL_NV_geometry_shader_passthrough 1\n"
"#define GL_NV_viewport_array2 1\n"
+ "#define GL_NV_shader_atomic_int64 1\n"
#endif
;
diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h
index 9399e9d..bd57103 100644
--- a/glslang/MachineIndependent/Versions.h
+++ b/glslang/MachineIndependent/Versions.h
@@ -182,6 +182,7 @@
const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2";
const char* const E_GL_NV_stereo_view_rendering = "GL_NV_stereo_view_rendering";
const char* const E_GL_NVX_multiview_per_view_attributes = "GL_NVX_multiview_per_view_attributes";
+const char* const E_GL_NV_shader_atomic_int64 = "GL_NV_shader_atomic_int64";
// Arrays of extensions for the above viewportEXTs duplications
diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp
index 4cfcf48..c37e460 100644
--- a/gtests/Spv.FromFile.cpp
+++ b/gtests/Spv.FromFile.cpp
@@ -410,7 +410,7 @@
"spv.int16.frag",
"spv.shaderBallotAMD.comp",
"spv.shaderFragMaskAMD.frag",
- "spv.textureGatherBiasLod.frag"
+ "spv.textureGatherBiasLod.frag",
})),
FileNameAsCustomTestSuffix
);
@@ -428,6 +428,7 @@
"spv.stereoViewRendering.tesc",
"spv.multiviewPerViewAttributes.vert",
"spv.multiviewPerViewAttributes.tesc",
+ "spv.atomicInt64.comp",
})),
FileNameAsCustomTestSuffix
);