Add HLSL memory barrier intrinsics, fix dst, add lit & EvaluateAttributeSnapped
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 03faf07..ceea1e2 100755
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -1405,6 +1405,10 @@
     case glslang::EOpMemoryBarrierImage:
     case glslang::EOpMemoryBarrierShared:
     case glslang::EOpGroupMemoryBarrier:
+    case glslang::EOpAllMemoryBarrierWithGroupSync:
+    case glslang::EOpGroupMemoryBarrierWithGroupSync:
+    case glslang::EOpWorkgroupMemoryBarrier:
+    case glslang::EOpWorkgroupMemoryBarrierWithGroupSync:
         noReturnValue = true;
         // These all have 0 operands and will naturally finish up in the code below for 0 operands
         break;
@@ -3986,6 +3990,21 @@
     case glslang::EOpGroupMemoryBarrier:
         builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsCrossWorkgroupMemoryMask);
         return 0;
+    case glslang::EOpAllMemoryBarrierWithGroupSync:
+        // Control barrier with non-"None" semantic is also a memory barrier.
+        builder.createControlBarrier(spv::ScopeDevice, spv::ScopeDevice, spv::MemorySemanticsAllMemory);
+        return 0;
+    case glslang::EOpGroupMemoryBarrierWithGroupSync:
+        // Control barrier with non-"None" semantic is also a memory barrier.
+        builder.createControlBarrier(spv::ScopeDevice, spv::ScopeDevice, spv::MemorySemanticsCrossWorkgroupMemoryMask);
+        return 0;
+    case glslang::EOpWorkgroupMemoryBarrier:
+        builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsWorkgroupMemoryMask);
+        return 0;
+    case glslang::EOpWorkgroupMemoryBarrierWithGroupSync:
+        // Control barrier with non-"None" semantic is also a memory barrier.
+        builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, spv::MemorySemanticsWorkgroupMemoryMask);
+        return 0;
     default:
         logger->missingFunctionality("unknown operation with no arguments");
         return 0;
diff --git a/Test/baseResults/hlsl.intrinsics.barriers.comp.out b/Test/baseResults/hlsl.intrinsics.barriers.comp.out
new file mode 100644
index 0000000..ba8a04b
--- /dev/null
+++ b/Test/baseResults/hlsl.intrinsics.barriers.comp.out
@@ -0,0 +1,70 @@
+hlsl.intrinsics.barriers.comp
+Shader version: 450
+local_size = (1, 1, 1)
+0:? Sequence
+0:14  Function Definition: ComputeShaderFunction( (temp float)
+0:3    Function Parameters: 
+0:?     Sequence
+0:4      MemoryBarrier (global void)
+0:5      AllMemoryBarrierWithGroupSync (global void)
+0:6      GroupMemoryBarrier (global void)
+0:7      GroupMemoryBarrierWithGroupSync (global void)
+0:8      WorkgroupMemoryBarrier (global void)
+0:9      WorkgroupMemoryBarrierWithGroupSync (global void)
+0:11      Branch: Return with expression
+0:11        Constant:
+0:11          0.000000
+0:?   Linker Objects
+
+
+Linked compute stage:
+
+
+Shader version: 450
+local_size = (1, 1, 1)
+0:? Sequence
+0:14  Function Definition: ComputeShaderFunction( (temp float)
+0:3    Function Parameters: 
+0:?     Sequence
+0:4      MemoryBarrier (global void)
+0:5      AllMemoryBarrierWithGroupSync (global void)
+0:6      GroupMemoryBarrier (global void)
+0:7      GroupMemoryBarrierWithGroupSync (global void)
+0:8      WorkgroupMemoryBarrier (global void)
+0:9      WorkgroupMemoryBarrierWithGroupSync (global void)
+0:11      Branch: Return with expression
+0:11        Constant:
+0:11          0.000000
+0:?   Linker Objects
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 15
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "ComputeShaderFunction"
+                              ExecutionMode 4 LocalSize 1 1 1
+                              Source HLSL 450
+                              Name 4  "ComputeShaderFunction"
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:      6(int) Constant 1
+               8:      6(int) Constant 4048
+               9:      6(int) Constant 512
+              10:      6(int) Constant 2
+              11:      6(int) Constant 256
+              12:             TypeFloat 32
+              13:   12(float) Constant 0
+4(ComputeShaderFunction):           2 Function None 3
+               5:             Label
+                              MemoryBarrier 7 8
+                              ControlBarrier 7 7 8
+                              MemoryBarrier 7 9
+                              ControlBarrier 7 7 9
+                              MemoryBarrier 10 11
+                              ControlBarrier 10 10 11
+                              ReturnValue 13
+                              FunctionEnd
diff --git a/Test/baseResults/hlsl.intrinsics.evalfns.frag.out b/Test/baseResults/hlsl.intrinsics.evalfns.frag.out
new file mode 100644
index 0000000..1c22b25
--- /dev/null
+++ b/Test/baseResults/hlsl.intrinsics.evalfns.frag.out
@@ -0,0 +1,160 @@
+hlsl.intrinsics.evalfns.frag
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:11  Function Definition: main(f1;vf2;vf3;vf4;vi2; (temp void)
+0:3    Function Parameters: 
+0:3      'inF1' (in float)
+0:3      'inF2' (in 2-component vector of float)
+0:3      'inF3' (in 3-component vector of float)
+0:3      'inF4' (in 4-component vector of float)
+0:3      'inI2' (in 2-component vector of int)
+0:?     Sequence
+0:4      interpolateAtOffset (temp float)
+0:4        'inF1' (in float)
+0:?         Constant:
+0:?           -0.500000
+0:?           -0.062500
+0:5      interpolateAtOffset (temp 2-component vector of float)
+0:5        'inF2' (in 2-component vector of float)
+0:?         Constant:
+0:?           0.000000
+0:?           0.062500
+0:6      interpolateAtOffset (temp 3-component vector of float)
+0:6        'inF3' (in 3-component vector of float)
+0:?         Constant:
+0:?           0.187500
+0:?           -0.375000
+0:7      interpolateAtOffset (temp 4-component vector of float)
+0:7        'inF4' (in 4-component vector of float)
+0:?         Constant:
+0:?           0.437500
+0:?           -0.500000
+0:9      interpolateAtOffset (temp float)
+0:9        'inF1' (in float)
+0:9        vector-scale (temp 2-component vector of float)
+0:9          Convert int to float (temp 2-component vector of float)
+0:9            right-shift (temp 2-component vector of int)
+0:9              left-shift (temp 2-component vector of int)
+0:9                'inI2' (in 2-component vector of int)
+0:9                Constant:
+0:9                  28 (const int)
+0:9              Constant:
+0:9                28 (const int)
+0:9          Constant:
+0:9            0.062500
+0:?   Linker Objects
+
+
+Linked fragment stage:
+
+
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:11  Function Definition: main(f1;vf2;vf3;vf4;vi2; (temp void)
+0:3    Function Parameters: 
+0:3      'inF1' (in float)
+0:3      'inF2' (in 2-component vector of float)
+0:3      'inF3' (in 3-component vector of float)
+0:3      'inF4' (in 4-component vector of float)
+0:3      'inI2' (in 2-component vector of int)
+0:?     Sequence
+0:4      interpolateAtOffset (temp float)
+0:4        'inF1' (in float)
+0:?         Constant:
+0:?           -0.500000
+0:?           -0.062500
+0:5      interpolateAtOffset (temp 2-component vector of float)
+0:5        'inF2' (in 2-component vector of float)
+0:?         Constant:
+0:?           0.000000
+0:?           0.062500
+0:6      interpolateAtOffset (temp 3-component vector of float)
+0:6        'inF3' (in 3-component vector of float)
+0:?         Constant:
+0:?           0.187500
+0:?           -0.375000
+0:7      interpolateAtOffset (temp 4-component vector of float)
+0:7        'inF4' (in 4-component vector of float)
+0:?         Constant:
+0:?           0.437500
+0:?           -0.500000
+0:9      interpolateAtOffset (temp float)
+0:9        'inF1' (in float)
+0:9        vector-scale (temp 2-component vector of float)
+0:9          Convert int to float (temp 2-component vector of float)
+0:9            right-shift (temp 2-component vector of int)
+0:9              left-shift (temp 2-component vector of int)
+0:9                'inI2' (in 2-component vector of int)
+0:9                Constant:
+0:9                  28 (const int)
+0:9              Constant:
+0:9                28 (const int)
+0:9          Constant:
+0:9            0.062500
+0:?   Linker Objects
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 46
+
+                              Capability Shader
+                              Capability InterpolationFunction
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "main" 8 15 22 29 36
+                              ExecutionMode 4 OriginUpperLeft
+                              Source HLSL 450
+                              Name 4  "main"
+                              Name 8  "inF1"
+                              Name 15  "inF2"
+                              Name 22  "inF3"
+                              Name 29  "inF4"
+                              Name 36  "inI2"
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypePointer Input 6(float)
+         8(inF1):      7(ptr) Variable Input
+               9:             TypeVector 6(float) 2
+              10:    6(float) Constant 3204448256
+              11:    6(float) Constant 3179282432
+              12:    9(fvec2) ConstantComposite 10 11
+              14:             TypePointer Input 9(fvec2)
+        15(inF2):     14(ptr) Variable Input
+              16:    6(float) Constant 0
+              17:    6(float) Constant 1031798784
+              18:    9(fvec2) ConstantComposite 16 17
+              20:             TypeVector 6(float) 3
+              21:             TypePointer Input 20(fvec3)
+        22(inF3):     21(ptr) Variable Input
+              23:    6(float) Constant 1044381696
+              24:    6(float) Constant 3200253952
+              25:    9(fvec2) ConstantComposite 23 24
+              27:             TypeVector 6(float) 4
+              28:             TypePointer Input 27(fvec4)
+        29(inF4):     28(ptr) Variable Input
+              30:    6(float) Constant 1054867456
+              31:    9(fvec2) ConstantComposite 30 10
+              33:             TypeInt 32 1
+              34:             TypeVector 33(int) 2
+              35:             TypePointer Input 34(ivec2)
+        36(inI2):     35(ptr) Variable Input
+              38:     33(int) Constant 28
+         4(main):           2 Function None 3
+               5:             Label
+              13:    6(float) ExtInst 1(GLSL.std.450) 78(InterpolateAtOffset) 8(inF1) 12
+              19:    9(fvec2) ExtInst 1(GLSL.std.450) 78(InterpolateAtOffset) 15(inF2) 18
+              26:   20(fvec3) ExtInst 1(GLSL.std.450) 78(InterpolateAtOffset) 22(inF3) 25
+              32:   27(fvec4) ExtInst 1(GLSL.std.450) 78(InterpolateAtOffset) 29(inF4) 31
+              37:   34(ivec2) Load 36(inI2)
+              39:   34(ivec2) CompositeConstruct 38 38
+              40:   34(ivec2) ShiftLeftLogical 37 39
+              41:   34(ivec2) CompositeConstruct 38 38
+              42:   34(ivec2) ShiftRightArithmetic 40 41
+              43:    9(fvec2) ConvertSToF 42
+              44:    9(fvec2) VectorTimesScalar 43 17
+              45:    6(float) ExtInst 1(GLSL.std.450) 78(InterpolateAtOffset) 8(inF1) 44
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/hlsl.intrinsics.f1632.frag.out b/Test/baseResults/hlsl.intrinsics.f1632.frag.out
new file mode 100644
index 0000000..a4930ec
--- /dev/null
+++ b/Test/baseResults/hlsl.intrinsics.f1632.frag.out
@@ -0,0 +1,129 @@
+hlsl.intrinsics.f1632.frag
+ERROR: 0:3: 'f32tof16' : unimplemented intrinsic: handle natively 
+ERROR: 0:16: 'f32tof16' : unimplemented intrinsic: handle natively 
+ERROR: 0:23: 'f32tof16' : unimplemented intrinsic: handle natively 
+ERROR: 0:30: 'f32tof16' : unimplemented intrinsic: handle natively 
+ERROR: 4 compilation errors.  No code generated.
+
+
+Shader version: 450
+gl_FragCoord origin is upper left
+ERROR: node is still EOpNull!
+0:8  Function Definition: PixelShaderFunction(f1; (temp float)
+0:2    Function Parameters: 
+0:2      'inF0' (in float)
+0:?     Sequence
+0:3      ERROR: Bad unary op
+ (global uint)
+0:3        'inF0' (in float)
+0:5      Branch: Return with expression
+0:5        Constant:
+0:5          0.000000
+0:14  Function Definition: PixelShaderFunction(vf1; (temp 1-component vector of float)
+0:9    Function Parameters: 
+0:9      'inF0' (in 1-component vector of float)
+0:?     Sequence
+0:11      Branch: Return with expression
+0:11        Constant:
+0:11          0.000000
+0:21  Function Definition: PixelShaderFunction(vf2; (temp 2-component vector of float)
+0:15    Function Parameters: 
+0:15      'inF0' (in 2-component vector of float)
+0:?     Sequence
+0:16      ERROR: Bad unary op
+ (global 2-component vector of uint)
+0:16        'inF0' (in 2-component vector of float)
+0:18      Branch: Return with expression
+0:?         Constant:
+0:?           1.000000
+0:?           2.000000
+0:28  Function Definition: PixelShaderFunction(vf3; (temp 3-component vector of float)
+0:22    Function Parameters: 
+0:22      'inF0' (in 3-component vector of float)
+0:?     Sequence
+0:23      ERROR: Bad unary op
+ (global 3-component vector of uint)
+0:23        'inF0' (in 3-component vector of float)
+0:25      Branch: Return with expression
+0:?         Constant:
+0:?           1.000000
+0:?           2.000000
+0:?           3.000000
+0:35  Function Definition: PixelShaderFunction(vf4; (temp 4-component vector of float)
+0:29    Function Parameters: 
+0:29      'inF0' (in 4-component vector of float)
+0:?     Sequence
+0:30      ERROR: Bad unary op
+ (global 4-component vector of uint)
+0:30        'inF0' (in 4-component vector of float)
+0:32      Branch: Return with expression
+0:?         Constant:
+0:?           1.000000
+0:?           2.000000
+0:?           3.000000
+0:?           4.000000
+0:?   Linker Objects
+
+
+Linked fragment stage:
+
+
+Shader version: 450
+gl_FragCoord origin is upper left
+ERROR: node is still EOpNull!
+0:8  Function Definition: PixelShaderFunction(f1; (temp float)
+0:2    Function Parameters: 
+0:2      'inF0' (in float)
+0:?     Sequence
+0:3      ERROR: Bad unary op
+ (global uint)
+0:3        'inF0' (in float)
+0:5      Branch: Return with expression
+0:5        Constant:
+0:5          0.000000
+0:14  Function Definition: PixelShaderFunction(vf1; (temp 1-component vector of float)
+0:9    Function Parameters: 
+0:9      'inF0' (in 1-component vector of float)
+0:?     Sequence
+0:11      Branch: Return with expression
+0:11        Constant:
+0:11          0.000000
+0:21  Function Definition: PixelShaderFunction(vf2; (temp 2-component vector of float)
+0:15    Function Parameters: 
+0:15      'inF0' (in 2-component vector of float)
+0:?     Sequence
+0:16      ERROR: Bad unary op
+ (global 2-component vector of uint)
+0:16        'inF0' (in 2-component vector of float)
+0:18      Branch: Return with expression
+0:?         Constant:
+0:?           1.000000
+0:?           2.000000
+0:28  Function Definition: PixelShaderFunction(vf3; (temp 3-component vector of float)
+0:22    Function Parameters: 
+0:22      'inF0' (in 3-component vector of float)
+0:?     Sequence
+0:23      ERROR: Bad unary op
+ (global 3-component vector of uint)
+0:23        'inF0' (in 3-component vector of float)
+0:25      Branch: Return with expression
+0:?         Constant:
+0:?           1.000000
+0:?           2.000000
+0:?           3.000000
+0:35  Function Definition: PixelShaderFunction(vf4; (temp 4-component vector of float)
+0:29    Function Parameters: 
+0:29      'inF0' (in 4-component vector of float)
+0:?     Sequence
+0:30      ERROR: Bad unary op
+ (global 4-component vector of uint)
+0:30        'inF0' (in 4-component vector of float)
+0:32      Branch: Return with expression
+0:?         Constant:
+0:?           1.000000
+0:?           2.000000
+0:?           3.000000
+0:?           4.000000
+0:?   Linker Objects
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/hlsl.intrinsics.frag.out b/Test/baseResults/hlsl.intrinsics.frag.out
index 1b886d1..c280bbd 100644
--- a/Test/baseResults/hlsl.intrinsics.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.frag.out
@@ -638,7 +638,7 @@
 0:281      dot-product (global float)
 0:281        'inF0' (in 4-component vector of float)
 0:281        'inF1' (in 4-component vector of float)
-0:282      Construct vec4 (temp float)
+0:282      Construct vec4 (temp 4-component vector of float)
 0:282        Constant:
 0:282          1.000000
 0:282        component-wise multiply (temp float)
@@ -2088,7 +2088,7 @@
 0:281      dot-product (global float)
 0:281        'inF0' (in 4-component vector of float)
 0:281        'inF1' (in 4-component vector of float)
-0:282      Construct vec4 (temp float)
+0:282      Construct vec4 (temp 4-component vector of float)
 0:282        Constant:
 0:282          1.000000
 0:282        component-wise multiply (temp float)
@@ -2897,13 +2897,13 @@
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 1264
+// Id's are bound by 1265
 
                               Capability Shader
                               Capability DerivativeControl
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Fragment 4  "PixelShaderFunction" 48 67 73 80 195 213 219 226 366 384 390 397 539 557 563 570 718 732 747 856 870 885 997 1011 1026
+                              EntryPoint Fragment 4  "PixelShaderFunction" 48 67 73 80 195 213 219 226 366 384 390 397 539 557 563 570 719 733 748 857 871 886 998 1012 1027
                               ExecutionMode 4 OriginUpperLeft
                               Source HLSL 450
                               Name 4  "PixelShaderFunction"
@@ -2947,58 +2947,58 @@
                               Name 557  "inU0"
                               Name 563  "inF1"
                               Name 570  "inF2"
-                              Name 634  "ResType"
-                              Name 718  "inF0"
-                              Name 732  "inF1"
-                              Name 747  "inF2"
-                              Name 790  "ResType"
-                              Name 856  "inF0"
-                              Name 870  "inF1"
-                              Name 885  "inF2"
-                              Name 931  "ResType"
-                              Name 997  "inF0"
-                              Name 1011  "inF1"
-                              Name 1026  "inF2"
-                              Name 1075  "ResType"
-                              Name 1140  "r0"
-                              Name 1144  "r1"
-                              Name 1148  "r2"
-                              Name 1152  "r3"
-                              Name 1156  "r4"
-                              Name 1160  "r5"
-                              Name 1164  "r6"
-                              Name 1168  "r7"
-                              Name 1172  "r8"
-                              Name 1176  "r0"
-                              Name 1180  "r1"
-                              Name 1184  "r2"
-                              Name 1188  "r3"
-                              Name 1192  "r4"
-                              Name 1196  "r5"
-                              Name 1200  "r6"
-                              Name 1204  "r7"
-                              Name 1208  "r8"
-                              Name 1212  "r0"
-                              Name 1216  "r1"
-                              Name 1220  "r2"
-                              Name 1224  "r3"
-                              Name 1228  "r4"
-                              Name 1232  "r5"
-                              Name 1236  "r6"
-                              Name 1240  "r7"
-                              Name 1244  "r8"
-                              Name 1249  "gs_ua"
-                              Name 1250  "gs_ub"
-                              Name 1251  "gs_uc"
-                              Name 1253  "gs_ua2"
-                              Name 1254  "gs_ub2"
-                              Name 1255  "gs_uc2"
-                              Name 1257  "gs_ua3"
-                              Name 1258  "gs_ub3"
-                              Name 1259  "gs_uc3"
-                              Name 1261  "gs_ua4"
-                              Name 1262  "gs_ub4"
-                              Name 1263  "gs_uc4"
+                              Name 635  "ResType"
+                              Name 719  "inF0"
+                              Name 733  "inF1"
+                              Name 748  "inF2"
+                              Name 791  "ResType"
+                              Name 857  "inF0"
+                              Name 871  "inF1"
+                              Name 886  "inF2"
+                              Name 932  "ResType"
+                              Name 998  "inF0"
+                              Name 1012  "inF1"
+                              Name 1027  "inF2"
+                              Name 1076  "ResType"
+                              Name 1141  "r0"
+                              Name 1145  "r1"
+                              Name 1149  "r2"
+                              Name 1153  "r3"
+                              Name 1157  "r4"
+                              Name 1161  "r5"
+                              Name 1165  "r6"
+                              Name 1169  "r7"
+                              Name 1173  "r8"
+                              Name 1177  "r0"
+                              Name 1181  "r1"
+                              Name 1185  "r2"
+                              Name 1189  "r3"
+                              Name 1193  "r4"
+                              Name 1197  "r5"
+                              Name 1201  "r6"
+                              Name 1205  "r7"
+                              Name 1209  "r8"
+                              Name 1213  "r0"
+                              Name 1217  "r1"
+                              Name 1221  "r2"
+                              Name 1225  "r3"
+                              Name 1229  "r4"
+                              Name 1233  "r5"
+                              Name 1237  "r6"
+                              Name 1241  "r7"
+                              Name 1245  "r8"
+                              Name 1250  "gs_ua"
+                              Name 1251  "gs_ub"
+                              Name 1252  "gs_uc"
+                              Name 1254  "gs_ua2"
+                              Name 1255  "gs_ub2"
+                              Name 1256  "gs_uc2"
+                              Name 1258  "gs_ua3"
+                              Name 1259  "gs_ub3"
+                              Name 1260  "gs_uc3"
+                              Name 1262  "gs_ua4"
+                              Name 1263  "gs_ub4"
+                              Name 1264  "gs_uc4"
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -3078,42 +3078,42 @@
              574:   34(fvec4) ConstantComposite 84 84 84 84
              575:             TypeVector 50(bool) 4
              585:  554(ivec4) ConstantComposite 93 241 412 159
-    634(ResType):             TypeStruct 34(fvec4) 551(ivec4)
-             678:     64(int) Constant 4
-             679:  554(ivec4) ConstantComposite 327 159 241 678
-             714:    6(float) Constant 1082130432
-             715:   34(fvec4) ConstantComposite 156 325 535 714
-             717:             TypePointer Input 10
-       718(inF0):    717(ptr) Variable Input
-       732(inF1):    717(ptr) Variable Input
-             738:          10 ConstantComposite 230 230
-             739:             TypeMatrix 231(bvec2) 2
-       747(inF2):    717(ptr) Variable Input
-    790(ResType):             TypeStruct 10 207(ivec2)
-             852:    8(fvec2) ConstantComposite 325 325
-             853:          10 ConstantComposite 852 852
-             855:             TypePointer Input 23
-       856(inF0):    855(ptr) Variable Input
-       870(inF1):    855(ptr) Variable Input
-             876:          23 ConstantComposite 401 401 401
-             877:             TypeMatrix 402(bvec3) 3
-       885(inF2):    855(ptr) Variable Input
-    931(ResType):             TypeStruct 23 378(ivec3)
-             993:   21(fvec3) ConstantComposite 535 535 535
-             994:          23 ConstantComposite 993 993 993
-             996:             TypePointer Input 36
-       997(inF0):    996(ptr) Variable Input
-      1011(inF1):    996(ptr) Variable Input
-            1017:          36 ConstantComposite 574 574 574 574
-            1018:             TypeMatrix 575(bvec4) 4
-      1026(inF2):    996(ptr) Variable Input
-   1075(ResType):             TypeStruct 36 551(ivec4)
-            1137:   34(fvec4) ConstantComposite 714 714 714 714
-            1138:          36 ConstantComposite 1137 1137 1137 1137
-            1248:             TypePointer Function 64(int)
-            1252:             TypePointer Function 210(ivec2)
-            1256:             TypePointer Function 381(ivec3)
-            1260:             TypePointer Function 554(ivec4)
+    635(ResType):             TypeStruct 34(fvec4) 551(ivec4)
+             679:     64(int) Constant 4
+             680:  554(ivec4) ConstantComposite 327 159 241 679
+             715:    6(float) Constant 1082130432
+             716:   34(fvec4) ConstantComposite 156 325 535 715
+             718:             TypePointer Input 10
+       719(inF0):    718(ptr) Variable Input
+       733(inF1):    718(ptr) Variable Input
+             739:          10 ConstantComposite 230 230
+             740:             TypeMatrix 231(bvec2) 2
+       748(inF2):    718(ptr) Variable Input
+    791(ResType):             TypeStruct 10 207(ivec2)
+             853:    8(fvec2) ConstantComposite 325 325
+             854:          10 ConstantComposite 853 853
+             856:             TypePointer Input 23
+       857(inF0):    856(ptr) Variable Input
+       871(inF1):    856(ptr) Variable Input
+             877:          23 ConstantComposite 401 401 401
+             878:             TypeMatrix 402(bvec3) 3
+       886(inF2):    856(ptr) Variable Input
+    932(ResType):             TypeStruct 23 378(ivec3)
+             994:   21(fvec3) ConstantComposite 535 535 535
+             995:          23 ConstantComposite 994 994 994
+             997:             TypePointer Input 36
+       998(inF0):    997(ptr) Variable Input
+      1012(inF1):    997(ptr) Variable Input
+            1018:          36 ConstantComposite 574 574 574 574
+            1019:             TypeMatrix 575(bvec4) 4
+      1027(inF2):    997(ptr) Variable Input
+   1076(ResType):             TypeStruct 36 551(ivec4)
+            1138:   34(fvec4) ConstantComposite 715 715 715 715
+            1139:          36 ConstantComposite 1138 1138 1138 1138
+            1249:             TypePointer Function 64(int)
+            1253:             TypePointer Function 210(ivec2)
+            1257:             TypePointer Function 381(ivec3)
+            1261:             TypePointer Function 554(ivec4)
 4(PixelShaderFunction):           2 Function None 3
                5:             Label
               49:    6(float) Load 48(inF0)
@@ -3260,51 +3260,51 @@
        17(inFM0):     11(ptr) FunctionParameter
        18(inFM1):     11(ptr) FunctionParameter
               20:             Label
-        1140(r0):      7(ptr) Variable Function
-        1144(r1):      9(ptr) Variable Function
-        1148(r2):      9(ptr) Variable Function
-        1152(r3):      7(ptr) Variable Function
-        1156(r4):      9(ptr) Variable Function
-        1160(r5):      9(ptr) Variable Function
-        1164(r6):     11(ptr) Variable Function
-        1168(r7):     11(ptr) Variable Function
-        1172(r8):     11(ptr) Variable Function
-            1141:    6(float) Load 13(inF0)
-            1142:    6(float) Load 14(inF1)
-            1143:    6(float) FMul 1141 1142
-                              Store 1140(r0) 1143
-            1145:    8(fvec2) Load 15(inFV0)
-            1146:    6(float) Load 13(inF0)
-            1147:    8(fvec2) VectorTimesScalar 1145 1146
-                              Store 1144(r1) 1147
-            1149:    6(float) Load 13(inF0)
-            1150:    8(fvec2) Load 15(inFV0)
-            1151:    8(fvec2) VectorTimesScalar 1150 1149
-                              Store 1148(r2) 1151
-            1153:    8(fvec2) Load 15(inFV0)
-            1154:    8(fvec2) Load 16(inFV1)
-            1155:    6(float) Dot 1153 1154
-                              Store 1152(r3) 1155
-            1157:          10 Load 17(inFM0)
-            1158:    8(fvec2) Load 15(inFV0)
-            1159:    8(fvec2) MatrixTimesVector 1157 1158
-                              Store 1156(r4) 1159
-            1161:    8(fvec2) Load 15(inFV0)
-            1162:          10 Load 17(inFM0)
-            1163:    8(fvec2) VectorTimesMatrix 1161 1162
-                              Store 1160(r5) 1163
-            1165:          10 Load 17(inFM0)
-            1166:    6(float) Load 13(inF0)
-            1167:          10 MatrixTimesScalar 1165 1166
-                              Store 1164(r6) 1167
-            1169:    6(float) Load 13(inF0)
-            1170:          10 Load 17(inFM0)
-            1171:          10 MatrixTimesScalar 1170 1169
-                              Store 1168(r7) 1171
-            1173:          10 Load 17(inFM0)
-            1174:          10 Load 18(inFM1)
-            1175:          10 MatrixTimesMatrix 1173 1174
-                              Store 1172(r8) 1175
+        1141(r0):      7(ptr) Variable Function
+        1145(r1):      9(ptr) Variable Function
+        1149(r2):      9(ptr) Variable Function
+        1153(r3):      7(ptr) Variable Function
+        1157(r4):      9(ptr) Variable Function
+        1161(r5):      9(ptr) Variable Function
+        1165(r6):     11(ptr) Variable Function
+        1169(r7):     11(ptr) Variable Function
+        1173(r8):     11(ptr) Variable Function
+            1142:    6(float) Load 13(inF0)
+            1143:    6(float) Load 14(inF1)
+            1144:    6(float) FMul 1142 1143
+                              Store 1141(r0) 1144
+            1146:    8(fvec2) Load 15(inFV0)
+            1147:    6(float) Load 13(inF0)
+            1148:    8(fvec2) VectorTimesScalar 1146 1147
+                              Store 1145(r1) 1148
+            1150:    6(float) Load 13(inF0)
+            1151:    8(fvec2) Load 15(inFV0)
+            1152:    8(fvec2) VectorTimesScalar 1151 1150
+                              Store 1149(r2) 1152
+            1154:    8(fvec2) Load 15(inFV0)
+            1155:    8(fvec2) Load 16(inFV1)
+            1156:    6(float) Dot 1154 1155
+                              Store 1153(r3) 1156
+            1158:          10 Load 17(inFM0)
+            1159:    8(fvec2) Load 15(inFV0)
+            1160:    8(fvec2) MatrixTimesVector 1158 1159
+                              Store 1157(r4) 1160
+            1162:    8(fvec2) Load 15(inFV0)
+            1163:          10 Load 17(inFM0)
+            1164:    8(fvec2) VectorTimesMatrix 1162 1163
+                              Store 1161(r5) 1164
+            1166:          10 Load 17(inFM0)
+            1167:    6(float) Load 13(inF0)
+            1168:          10 MatrixTimesScalar 1166 1167
+                              Store 1165(r6) 1168
+            1170:    6(float) Load 13(inF0)
+            1171:          10 Load 17(inFM0)
+            1172:          10 MatrixTimesScalar 1171 1170
+                              Store 1169(r7) 1172
+            1174:          10 Load 17(inFM0)
+            1175:          10 Load 18(inFM1)
+            1176:          10 MatrixTimesMatrix 1174 1175
+                              Store 1173(r8) 1176
                               Return
                               FunctionEnd
 32(TestGenMul(f1;f1;vf3;vf3;mf33;mf33;):           2 Function None 25
@@ -3315,51 +3315,51 @@
        30(inFM0):     24(ptr) FunctionParameter
        31(inFM1):     24(ptr) FunctionParameter
               33:             Label
-        1176(r0):      7(ptr) Variable Function
-        1180(r1):     22(ptr) Variable Function
-        1184(r2):     22(ptr) Variable Function
-        1188(r3):      7(ptr) Variable Function
-        1192(r4):     22(ptr) Variable Function
-        1196(r5):     22(ptr) Variable Function
-        1200(r6):     24(ptr) Variable Function
-        1204(r7):     24(ptr) Variable Function
-        1208(r8):     24(ptr) Variable Function
-            1177:    6(float) Load 26(inF0)
-            1178:    6(float) Load 27(inF1)
-            1179:    6(float) FMul 1177 1178
-                              Store 1176(r0) 1179
-            1181:   21(fvec3) Load 28(inFV0)
-            1182:    6(float) Load 26(inF0)
-            1183:   21(fvec3) VectorTimesScalar 1181 1182
-                              Store 1180(r1) 1183
-            1185:    6(float) Load 26(inF0)
-            1186:   21(fvec3) Load 28(inFV0)
-            1187:   21(fvec3) VectorTimesScalar 1186 1185
-                              Store 1184(r2) 1187
-            1189:   21(fvec3) Load 28(inFV0)
-            1190:   21(fvec3) Load 29(inFV1)
-            1191:    6(float) Dot 1189 1190
-                              Store 1188(r3) 1191
-            1193:          23 Load 30(inFM0)
-            1194:   21(fvec3) Load 28(inFV0)
-            1195:   21(fvec3) MatrixTimesVector 1193 1194
-                              Store 1192(r4) 1195
-            1197:   21(fvec3) Load 28(inFV0)
-            1198:          23 Load 30(inFM0)
-            1199:   21(fvec3) VectorTimesMatrix 1197 1198
-                              Store 1196(r5) 1199
-            1201:          23 Load 30(inFM0)
-            1202:    6(float) Load 26(inF0)
-            1203:          23 MatrixTimesScalar 1201 1202
-                              Store 1200(r6) 1203
-            1205:    6(float) Load 26(inF0)
-            1206:          23 Load 30(inFM0)
-            1207:          23 MatrixTimesScalar 1206 1205
-                              Store 1204(r7) 1207
-            1209:          23 Load 30(inFM0)
-            1210:          23 Load 31(inFM1)
-            1211:          23 MatrixTimesMatrix 1209 1210
-                              Store 1208(r8) 1211
+        1177(r0):      7(ptr) Variable Function
+        1181(r1):     22(ptr) Variable Function
+        1185(r2):     22(ptr) Variable Function
+        1189(r3):      7(ptr) Variable Function
+        1193(r4):     22(ptr) Variable Function
+        1197(r5):     22(ptr) Variable Function
+        1201(r6):     24(ptr) Variable Function
+        1205(r7):     24(ptr) Variable Function
+        1209(r8):     24(ptr) Variable Function
+            1178:    6(float) Load 26(inF0)
+            1179:    6(float) Load 27(inF1)
+            1180:    6(float) FMul 1178 1179
+                              Store 1177(r0) 1180
+            1182:   21(fvec3) Load 28(inFV0)
+            1183:    6(float) Load 26(inF0)
+            1184:   21(fvec3) VectorTimesScalar 1182 1183
+                              Store 1181(r1) 1184
+            1186:    6(float) Load 26(inF0)
+            1187:   21(fvec3) Load 28(inFV0)
+            1188:   21(fvec3) VectorTimesScalar 1187 1186
+                              Store 1185(r2) 1188
+            1190:   21(fvec3) Load 28(inFV0)
+            1191:   21(fvec3) Load 29(inFV1)
+            1192:    6(float) Dot 1190 1191
+                              Store 1189(r3) 1192
+            1194:          23 Load 30(inFM0)
+            1195:   21(fvec3) Load 28(inFV0)
+            1196:   21(fvec3) MatrixTimesVector 1194 1195
+                              Store 1193(r4) 1196
+            1198:   21(fvec3) Load 28(inFV0)
+            1199:          23 Load 30(inFM0)
+            1200:   21(fvec3) VectorTimesMatrix 1198 1199
+                              Store 1197(r5) 1200
+            1202:          23 Load 30(inFM0)
+            1203:    6(float) Load 26(inF0)
+            1204:          23 MatrixTimesScalar 1202 1203
+                              Store 1201(r6) 1204
+            1206:    6(float) Load 26(inF0)
+            1207:          23 Load 30(inFM0)
+            1208:          23 MatrixTimesScalar 1207 1206
+                              Store 1205(r7) 1208
+            1210:          23 Load 30(inFM0)
+            1211:          23 Load 31(inFM1)
+            1212:          23 MatrixTimesMatrix 1210 1211
+                              Store 1209(r8) 1212
                               Return
                               FunctionEnd
 45(TestGenMul(f1;f1;vf4;vf4;mf44;mf44;):           2 Function None 38
@@ -3370,62 +3370,62 @@
        43(inFM0):     37(ptr) FunctionParameter
        44(inFM1):     37(ptr) FunctionParameter
               46:             Label
-        1212(r0):      7(ptr) Variable Function
-        1216(r1):     35(ptr) Variable Function
-        1220(r2):     35(ptr) Variable Function
-        1224(r3):      7(ptr) Variable Function
-        1228(r4):     35(ptr) Variable Function
-        1232(r5):     35(ptr) Variable Function
-        1236(r6):     37(ptr) Variable Function
-        1240(r7):     37(ptr) Variable Function
-        1244(r8):     37(ptr) Variable Function
-     1249(gs_ua):   1248(ptr) Variable Function
-     1250(gs_ub):   1248(ptr) Variable Function
-     1251(gs_uc):   1248(ptr) Variable Function
-    1253(gs_ua2):   1252(ptr) Variable Function
-    1254(gs_ub2):   1252(ptr) Variable Function
-    1255(gs_uc2):   1252(ptr) Variable Function
-    1257(gs_ua3):   1256(ptr) Variable Function
-    1258(gs_ub3):   1256(ptr) Variable Function
-    1259(gs_uc3):   1256(ptr) Variable Function
-    1261(gs_ua4):   1260(ptr) Variable Function
-    1262(gs_ub4):   1260(ptr) Variable Function
-    1263(gs_uc4):   1260(ptr) Variable Function
-            1213:    6(float) Load 39(inF0)
-            1214:    6(float) Load 40(inF1)
-            1215:    6(float) FMul 1213 1214
-                              Store 1212(r0) 1215
-            1217:   34(fvec4) Load 41(inFV0)
-            1218:    6(float) Load 39(inF0)
-            1219:   34(fvec4) VectorTimesScalar 1217 1218
-                              Store 1216(r1) 1219
-            1221:    6(float) Load 39(inF0)
-            1222:   34(fvec4) Load 41(inFV0)
-            1223:   34(fvec4) VectorTimesScalar 1222 1221
-                              Store 1220(r2) 1223
-            1225:   34(fvec4) Load 41(inFV0)
-            1226:   34(fvec4) Load 42(inFV1)
-            1227:    6(float) Dot 1225 1226
-                              Store 1224(r3) 1227
-            1229:          36 Load 43(inFM0)
-            1230:   34(fvec4) Load 41(inFV0)
-            1231:   34(fvec4) MatrixTimesVector 1229 1230
-                              Store 1228(r4) 1231
-            1233:   34(fvec4) Load 41(inFV0)
-            1234:          36 Load 43(inFM0)
-            1235:   34(fvec4) VectorTimesMatrix 1233 1234
-                              Store 1232(r5) 1235
-            1237:          36 Load 43(inFM0)
-            1238:    6(float) Load 39(inF0)
-            1239:          36 MatrixTimesScalar 1237 1238
-                              Store 1236(r6) 1239
-            1241:    6(float) Load 39(inF0)
-            1242:          36 Load 43(inFM0)
-            1243:          36 MatrixTimesScalar 1242 1241
-                              Store 1240(r7) 1243
-            1245:          36 Load 43(inFM0)
-            1246:          36 Load 44(inFM1)
-            1247:          36 MatrixTimesMatrix 1245 1246
-                              Store 1244(r8) 1247
+        1213(r0):      7(ptr) Variable Function
+        1217(r1):     35(ptr) Variable Function
+        1221(r2):     35(ptr) Variable Function
+        1225(r3):      7(ptr) Variable Function
+        1229(r4):     35(ptr) Variable Function
+        1233(r5):     35(ptr) Variable Function
+        1237(r6):     37(ptr) Variable Function
+        1241(r7):     37(ptr) Variable Function
+        1245(r8):     37(ptr) Variable Function
+     1250(gs_ua):   1249(ptr) Variable Function
+     1251(gs_ub):   1249(ptr) Variable Function
+     1252(gs_uc):   1249(ptr) Variable Function
+    1254(gs_ua2):   1253(ptr) Variable Function
+    1255(gs_ub2):   1253(ptr) Variable Function
+    1256(gs_uc2):   1253(ptr) Variable Function
+    1258(gs_ua3):   1257(ptr) Variable Function
+    1259(gs_ub3):   1257(ptr) Variable Function
+    1260(gs_uc3):   1257(ptr) Variable Function
+    1262(gs_ua4):   1261(ptr) Variable Function
+    1263(gs_ub4):   1261(ptr) Variable Function
+    1264(gs_uc4):   1261(ptr) Variable Function
+            1214:    6(float) Load 39(inF0)
+            1215:    6(float) Load 40(inF1)
+            1216:    6(float) FMul 1214 1215
+                              Store 1213(r0) 1216
+            1218:   34(fvec4) Load 41(inFV0)
+            1219:    6(float) Load 39(inF0)
+            1220:   34(fvec4) VectorTimesScalar 1218 1219
+                              Store 1217(r1) 1220
+            1222:    6(float) Load 39(inF0)
+            1223:   34(fvec4) Load 41(inFV0)
+            1224:   34(fvec4) VectorTimesScalar 1223 1222
+                              Store 1221(r2) 1224
+            1226:   34(fvec4) Load 41(inFV0)
+            1227:   34(fvec4) Load 42(inFV1)
+            1228:    6(float) Dot 1226 1227
+                              Store 1225(r3) 1228
+            1230:          36 Load 43(inFM0)
+            1231:   34(fvec4) Load 41(inFV0)
+            1232:   34(fvec4) MatrixTimesVector 1230 1231
+                              Store 1229(r4) 1232
+            1234:   34(fvec4) Load 41(inFV0)
+            1235:          36 Load 43(inFM0)
+            1236:   34(fvec4) VectorTimesMatrix 1234 1235
+                              Store 1233(r5) 1236
+            1238:          36 Load 43(inFM0)
+            1239:    6(float) Load 39(inF0)
+            1240:          36 MatrixTimesScalar 1238 1239
+                              Store 1237(r6) 1240
+            1242:    6(float) Load 39(inF0)
+            1243:          36 Load 43(inFM0)
+            1244:          36 MatrixTimesScalar 1243 1242
+                              Store 1241(r7) 1244
+            1246:          36 Load 43(inFM0)
+            1247:          36 Load 44(inFM1)
+            1248:          36 MatrixTimesMatrix 1246 1247
+                              Store 1245(r8) 1248
                               Return
                               FunctionEnd
diff --git a/Test/baseResults/hlsl.intrinsics.lit.frag.out b/Test/baseResults/hlsl.intrinsics.lit.frag.out
new file mode 100644
index 0000000..3a6becf
--- /dev/null
+++ b/Test/baseResults/hlsl.intrinsics.lit.frag.out
@@ -0,0 +1,134 @@
+hlsl.intrinsics.lit.frag
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:5  Function Definition: PixelShaderFunction(f1;f1;f1; (temp void)
+0:2    Function Parameters: 
+0:2      'n_dot_l' (in float)
+0:2      'n_dot_h' (in float)
+0:2      'm' (in float)
+0:?     Sequence
+0:3      move second child to first child (temp 4-component vector of float)
+0:3        'r0' (temp 4-component vector of float)
+0:3        Construct vec4 (temp 4-component vector of float)
+0:3          Constant:
+0:3            1.000000
+0:3          max (temp float)
+0:3            'n_dot_l' (in float)
+0:3            Constant:
+0:3              0.000000
+0:3          Test condition and select (temp float)
+0:3            Condition
+0:3            Compare Less Than (temp bool)
+0:3              min (temp float)
+0:3                'n_dot_l' (in float)
+0:3                'n_dot_h' (in float)
+0:3              Constant:
+0:3                0.000000
+0:3            true case
+0:3            Constant:
+0:3              0.000000
+0:3            false case
+0:3            component-wise multiply (temp float)
+0:3              'n_dot_h' (in float)
+0:3              'm' (in float)
+0:3          Constant:
+0:3            1.000000
+0:?   Linker Objects
+
+
+Linked fragment stage:
+
+
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:5  Function Definition: PixelShaderFunction(f1;f1;f1; (temp void)
+0:2    Function Parameters: 
+0:2      'n_dot_l' (in float)
+0:2      'n_dot_h' (in float)
+0:2      'm' (in float)
+0:?     Sequence
+0:3      move second child to first child (temp 4-component vector of float)
+0:3        'r0' (temp 4-component vector of float)
+0:3        Construct vec4 (temp 4-component vector of float)
+0:3          Constant:
+0:3            1.000000
+0:3          max (temp float)
+0:3            'n_dot_l' (in float)
+0:3            Constant:
+0:3              0.000000
+0:3          Test condition and select (temp float)
+0:3            Condition
+0:3            Compare Less Than (temp bool)
+0:3              min (temp float)
+0:3                'n_dot_l' (in float)
+0:3                'n_dot_h' (in float)
+0:3              Constant:
+0:3                0.000000
+0:3            true case
+0:3            Constant:
+0:3              0.000000
+0:3            false case
+0:3            component-wise multiply (temp float)
+0:3              'n_dot_h' (in float)
+0:3              'm' (in float)
+0:3          Constant:
+0:3            1.000000
+0:?   Linker Objects
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 33
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "PixelShaderFunction" 12 19 28
+                              ExecutionMode 4 OriginUpperLeft
+                              Source HLSL 450
+                              Name 4  "PixelShaderFunction"
+                              Name 9  "r0"
+                              Name 12  "n_dot_l"
+                              Name 19  "n_dot_h"
+                              Name 28  "m"
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+               8:             TypePointer Function 7(fvec4)
+              10:    6(float) Constant 1065353216
+              11:             TypePointer Input 6(float)
+     12(n_dot_l):     11(ptr) Variable Input
+              14:    6(float) Constant 0
+              16:             TypePointer Function 6(float)
+     19(n_dot_h):     11(ptr) Variable Input
+              22:             TypeBool
+           28(m):     11(ptr) Variable Input
+4(PixelShaderFunction):           2 Function None 3
+               5:             Label
+           9(r0):      8(ptr) Variable Function
+              17:     16(ptr) Variable Function
+              13:    6(float) Load 12(n_dot_l)
+              15:    6(float) ExtInst 1(GLSL.std.450) 40(FMax) 13 14
+              18:    6(float) Load 12(n_dot_l)
+              20:    6(float) Load 19(n_dot_h)
+              21:    6(float) ExtInst 1(GLSL.std.450) 37(FMin) 18 20
+              23:    22(bool) FOrdLessThan 21 14
+                              SelectionMerge 25 None
+                              BranchConditional 23 24 26
+              24:               Label
+                                Store 17 14
+                                Branch 25
+              26:               Label
+              27:    6(float)   Load 19(n_dot_h)
+              29:    6(float)   Load 28(m)
+              30:    6(float)   FMul 27 29
+                                Store 17 30
+                                Branch 25
+              25:             Label
+              31:    6(float) Load 17
+              32:    7(fvec4) CompositeConstruct 10 15 31 10
+                              Store 9(r0) 32
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/hlsl.intrinsics.negative.vert.out b/Test/baseResults/hlsl.intrinsics.negative.vert.out
index 23e4b24..b24ef69 100644
--- a/Test/baseResults/hlsl.intrinsics.negative.vert.out
+++ b/Test/baseResults/hlsl.intrinsics.negative.vert.out
@@ -1,4 +1,6 @@
 hlsl.intrinsics.negative.vert
+ERROR: 0:18: 'AllMemoryBarrier' : no matching overloaded function found 
+ERROR: 0:19: 'AllMemoryBarrierWithGroupSync' : no matching overloaded function found 
 ERROR: 0:20: 'asdouble' : no matching overloaded function found 
 ERROR: 0:21: 'CheckAccessFullyMapped' : no matching overloaded function found 
 ERROR: 0:22: 'CheckAccessFullyMapped' : no matching overloaded function found 
@@ -6,6 +8,8 @@
 ERROR: 0:24: 'countbits' : no matching overloaded function found 
 ERROR: 0:25: 'cross' : no matching overloaded function found 
 ERROR: 0:26: 'D3DCOLORtoUBYTE4' : no matching overloaded function found 
+ERROR: 0:27: 'DeviceMemoryBarrier' : no matching overloaded function found 
+ERROR: 0:28: 'DeviceMemoryBarrierWithGroupSync' : no matching overloaded function found 
 ERROR: 0:29: 'ddx' : no matching overloaded function found 
 ERROR: 0:30: 'ddx_coarse' : no matching overloaded function found 
 ERROR: 0:31: 'ddx_fine' : no matching overloaded function found 
@@ -35,6 +39,8 @@
 ERROR: 0:55: 'InterlockedOr' : no matching overloaded function found 
 ERROR: 0:56: 'InterlockedXor' : no matching overloaded function found 
 ERROR: 0:57: 'InterlockedXor' : no matching overloaded function found 
+ERROR: 0:58: 'GroupMemoryBarrier' : no matching overloaded function found 
+ERROR: 0:59: 'GroupMemoryBarrierWithGroupSync' : no matching overloaded function found 
 ERROR: 0:60: 'length' : no matching overloaded function found 
 ERROR: 0:61: 'msad4' : no matching overloaded function found 
 ERROR: 0:62: 'normalize' : no matching overloaded function found 
@@ -226,7 +232,7 @@
 ERROR: 0:270: 'reflect' : no matching overloaded function found 
 ERROR: 0:270: 'refract' : no matching overloaded function found 
 ERROR: 0:270: 'reversebits' : no matching overloaded function found 
-ERROR: 227 compilation errors.  No code generated.
+ERROR: 233 compilation errors.  No code generated.
 
 
 Shader version: 450
@@ -238,6 +244,10 @@
 0:15      'inF2' (in float)
 0:15      'inI0' (in int)
 0:?     Sequence
+0:18      Constant:
+0:18        0.000000
+0:19      Constant:
+0:19        0.000000
 0:20      Constant:
 0:20        0.000000
 0:21      Constant:
@@ -252,6 +262,10 @@
 0:25        0.000000
 0:26      Constant:
 0:26        0.000000
+0:27      Constant:
+0:27        0.000000
+0:28      Constant:
+0:28        0.000000
 0:29      Constant:
 0:29        0.000000
 0:30      Constant:
@@ -310,6 +324,10 @@
 0:56        0.000000
 0:57      Constant:
 0:57        0.000000
+0:58      Constant:
+0:58        0.000000
+0:59      Constant:
+0:59        0.000000
 0:60      Constant:
 0:60        0.000000
 0:61      Constant:
@@ -821,6 +839,10 @@
 0:15      'inF2' (in float)
 0:15      'inI0' (in int)
 0:?     Sequence
+0:18      Constant:
+0:18        0.000000
+0:19      Constant:
+0:19        0.000000
 0:20      Constant:
 0:20        0.000000
 0:21      Constant:
@@ -835,6 +857,10 @@
 0:25        0.000000
 0:26      Constant:
 0:26        0.000000
+0:27      Constant:
+0:27        0.000000
+0:28      Constant:
+0:28        0.000000
 0:29      Constant:
 0:29        0.000000
 0:30      Constant:
@@ -893,6 +919,10 @@
 0:56        0.000000
 0:57      Constant:
 0:57        0.000000
+0:58      Constant:
+0:58        0.000000
+0:59      Constant:
+0:59        0.000000
 0:60      Constant:
 0:60        0.000000
 0:61      Constant:
diff --git a/Test/baseResults/hlsl.intrinsics.vert.out b/Test/baseResults/hlsl.intrinsics.vert.out
index ee99b6b..d4c9706 100644
--- a/Test/baseResults/hlsl.intrinsics.vert.out
+++ b/Test/baseResults/hlsl.intrinsics.vert.out
@@ -530,7 +530,7 @@
 0:227      dot-product (global float)
 0:227        'inF0' (in 4-component vector of float)
 0:227        'inF1' (in 4-component vector of float)
-0:228      Construct vec4 (temp float)
+0:228      Construct vec4 (temp 4-component vector of float)
 0:228        Constant:
 0:228          1.000000
 0:228        component-wise multiply (temp float)
@@ -1759,7 +1759,7 @@
 0:227      dot-product (global float)
 0:227        'inF0' (in 4-component vector of float)
 0:227        'inF1' (in 4-component vector of float)
-0:228      Construct vec4 (temp float)
+0:228      Construct vec4 (temp 4-component vector of float)
 0:228        Constant:
 0:228          1.000000
 0:228        component-wise multiply (temp float)
@@ -2455,12 +2455,12 @@
 
 // Module Version 10000
 // Generated by (magic number): 80001
-// Id's are bound by 1089
+// Id's are bound by 1090
 
                               Capability Shader
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint Vertex 4  "VertexShaderFunction" 48 67 73 80 174 192 198 205 321 339 345 352 470 488 494 501 625 639 646 741 755 762 860 874 881
+                              EntryPoint Vertex 4  "VertexShaderFunction" 48 67 73 80 174 192 198 205 321 339 345 352 470 488 494 501 626 640 647 742 756 763 861 875 882
                               Source HLSL 450
                               Name 4  "VertexShaderFunction"
                               Name 19  "TestGenMul(f1;f1;vf2;vf2;mf22;mf22;"
@@ -2503,46 +2503,46 @@
                               Name 488  "inU0"
                               Name 494  "inF1"
                               Name 501  "inF2"
-                              Name 545  "ResType"
-                              Name 625  "inF0"
-                              Name 639  "inF1"
-                              Name 646  "inF2"
-                              Name 677  "ResType"
-                              Name 741  "inF0"
-                              Name 755  "inF1"
-                              Name 762  "inF2"
-                              Name 796  "ResType"
-                              Name 860  "inF0"
-                              Name 874  "inF1"
-                              Name 881  "inF2"
-                              Name 918  "ResType"
-                              Name 981  "r0"
-                              Name 985  "r1"
-                              Name 989  "r2"
-                              Name 993  "r3"
-                              Name 997  "r4"
-                              Name 1001  "r5"
-                              Name 1005  "r6"
-                              Name 1009  "r7"
-                              Name 1013  "r8"
-                              Name 1017  "r0"
-                              Name 1021  "r1"
-                              Name 1025  "r2"
-                              Name 1029  "r3"
-                              Name 1033  "r4"
-                              Name 1037  "r5"
-                              Name 1041  "r6"
-                              Name 1045  "r7"
-                              Name 1049  "r8"
-                              Name 1053  "r0"
-                              Name 1057  "r1"
-                              Name 1061  "r2"
-                              Name 1065  "r3"
-                              Name 1069  "r4"
-                              Name 1073  "r5"
-                              Name 1077  "r6"
-                              Name 1081  "r7"
-                              Name 1085  "r8"
+                              Name 546  "ResType"
+                              Name 626  "inF0"
+                              Name 640  "inF1"
+                              Name 647  "inF2"
+                              Name 678  "ResType"
+                              Name 742  "inF0"
+                              Name 756  "inF1"
+                              Name 763  "inF2"
+                              Name 797  "ResType"
+                              Name 861  "inF0"
+                              Name 875  "inF1"
+                              Name 882  "inF2"
+                              Name 919  "ResType"
+                              Name 982  "r0"
+                              Name 986  "r1"
+                              Name 990  "r2"
+                              Name 994  "r3"
+                              Name 998  "r4"
+                              Name 1002  "r5"
+                              Name 1006  "r6"
+                              Name 1010  "r7"
+                              Name 1014  "r8"
+                              Name 1018  "r0"
+                              Name 1022  "r1"
+                              Name 1026  "r2"
+                              Name 1030  "r3"
+                              Name 1034  "r4"
+                              Name 1038  "r5"
+                              Name 1042  "r6"
+                              Name 1046  "r7"
+                              Name 1050  "r8"
+                              Name 1054  "r0"
+                              Name 1058  "r1"
+                              Name 1062  "r2"
+                              Name 1066  "r3"
+                              Name 1070  "r4"
+                              Name 1074  "r5"
+                              Name 1078  "r6"
+                              Name 1082  "r7"
+                              Name 1086  "r8"
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -2618,33 +2618,33 @@
        494(inF1):    469(ptr) Variable Input
        501(inF2):    469(ptr) Variable Input
              508:  485(ivec4) ConstantComposite 87 212 359 136
-    545(ResType):             TypeStruct 34(fvec4) 482(ivec4)
-             550:             TypeVector 50(bool) 4
-             585:     64(int) Constant 4
-             586:  485(ivec4) ConstantComposite 282 136 212 585
-             621:    6(float) Constant 1082130432
-             622:   34(fvec4) ConstantComposite 144 280 466 621
-             624:             TypePointer Input 10
-       625(inF0):    624(ptr) Variable Input
-       639(inF1):    624(ptr) Variable Input
-       646(inF2):    624(ptr) Variable Input
-    677(ResType):             TypeStruct 10 186(ivec2)
-             737:    8(fvec2) ConstantComposite 280 280
-             738:          10 ConstantComposite 737 737
-             740:             TypePointer Input 23
-       741(inF0):    740(ptr) Variable Input
-       755(inF1):    740(ptr) Variable Input
-       762(inF2):    740(ptr) Variable Input
-    796(ResType):             TypeStruct 23 333(ivec3)
-             856:   21(fvec3) ConstantComposite 466 466 466
-             857:          23 ConstantComposite 856 856 856
-             859:             TypePointer Input 36
-       860(inF0):    859(ptr) Variable Input
-       874(inF1):    859(ptr) Variable Input
-       881(inF2):    859(ptr) Variable Input
-    918(ResType):             TypeStruct 36 482(ivec4)
-             978:   34(fvec4) ConstantComposite 621 621 621 621
-             979:          36 ConstantComposite 978 978 978 978
+    546(ResType):             TypeStruct 34(fvec4) 482(ivec4)
+             551:             TypeVector 50(bool) 4
+             586:     64(int) Constant 4
+             587:  485(ivec4) ConstantComposite 282 136 212 586
+             622:    6(float) Constant 1082130432
+             623:   34(fvec4) ConstantComposite 144 280 466 622
+             625:             TypePointer Input 10
+       626(inF0):    625(ptr) Variable Input
+       640(inF1):    625(ptr) Variable Input
+       647(inF2):    625(ptr) Variable Input
+    678(ResType):             TypeStruct 10 186(ivec2)
+             738:    8(fvec2) ConstantComposite 280 280
+             739:          10 ConstantComposite 738 738
+             741:             TypePointer Input 23
+       742(inF0):    741(ptr) Variable Input
+       756(inF1):    741(ptr) Variable Input
+       763(inF2):    741(ptr) Variable Input
+    797(ResType):             TypeStruct 23 333(ivec3)
+             857:   21(fvec3) ConstantComposite 466 466 466
+             858:          23 ConstantComposite 857 857 857
+             860:             TypePointer Input 36
+       861(inF0):    860(ptr) Variable Input
+       875(inF1):    860(ptr) Variable Input
+       882(inF2):    860(ptr) Variable Input
+    919(ResType):             TypeStruct 36 482(ivec4)
+             979:   34(fvec4) ConstantComposite 622 622 622 622
+             980:          36 ConstantComposite 979 979 979 979
 4(VertexShaderFunction):           2 Function None 3
                5:             Label
               49:    6(float) Load 48(inF0)
@@ -2768,51 +2768,51 @@
        17(inFM0):     11(ptr) FunctionParameter
        18(inFM1):     11(ptr) FunctionParameter
               20:             Label
-         981(r0):      7(ptr) Variable Function
-         985(r1):      9(ptr) Variable Function
-         989(r2):      9(ptr) Variable Function
-         993(r3):      7(ptr) Variable Function
-         997(r4):      9(ptr) Variable Function
-        1001(r5):      9(ptr) Variable Function
-        1005(r6):     11(ptr) Variable Function
-        1009(r7):     11(ptr) Variable Function
-        1013(r8):     11(ptr) Variable Function
-             982:    6(float) Load 13(inF0)
-             983:    6(float) Load 14(inF1)
-             984:    6(float) FMul 982 983
-                              Store 981(r0) 984
-             986:    8(fvec2) Load 15(inFV0)
-             987:    6(float) Load 13(inF0)
-             988:    8(fvec2) VectorTimesScalar 986 987
-                              Store 985(r1) 988
-             990:    6(float) Load 13(inF0)
-             991:    8(fvec2) Load 15(inFV0)
-             992:    8(fvec2) VectorTimesScalar 991 990
-                              Store 989(r2) 992
-             994:    8(fvec2) Load 15(inFV0)
-             995:    8(fvec2) Load 16(inFV1)
-             996:    6(float) Dot 994 995
-                              Store 993(r3) 996
-             998:          10 Load 17(inFM0)
-             999:    8(fvec2) Load 15(inFV0)
-            1000:    8(fvec2) MatrixTimesVector 998 999
-                              Store 997(r4) 1000
-            1002:    8(fvec2) Load 15(inFV0)
-            1003:          10 Load 17(inFM0)
-            1004:    8(fvec2) VectorTimesMatrix 1002 1003
-                              Store 1001(r5) 1004
-            1006:          10 Load 17(inFM0)
-            1007:    6(float) Load 13(inF0)
-            1008:          10 MatrixTimesScalar 1006 1007
-                              Store 1005(r6) 1008
-            1010:    6(float) Load 13(inF0)
-            1011:          10 Load 17(inFM0)
-            1012:          10 MatrixTimesScalar 1011 1010
-                              Store 1009(r7) 1012
-            1014:          10 Load 17(inFM0)
-            1015:          10 Load 18(inFM1)
-            1016:          10 MatrixTimesMatrix 1014 1015
-                              Store 1013(r8) 1016
+         982(r0):      7(ptr) Variable Function
+         986(r1):      9(ptr) Variable Function
+         990(r2):      9(ptr) Variable Function
+         994(r3):      7(ptr) Variable Function
+         998(r4):      9(ptr) Variable Function
+        1002(r5):      9(ptr) Variable Function
+        1006(r6):     11(ptr) Variable Function
+        1010(r7):     11(ptr) Variable Function
+        1014(r8):     11(ptr) Variable Function
+             983:    6(float) Load 13(inF0)
+             984:    6(float) Load 14(inF1)
+             985:    6(float) FMul 983 984
+                              Store 982(r0) 985
+             987:    8(fvec2) Load 15(inFV0)
+             988:    6(float) Load 13(inF0)
+             989:    8(fvec2) VectorTimesScalar 987 988
+                              Store 986(r1) 989
+             991:    6(float) Load 13(inF0)
+             992:    8(fvec2) Load 15(inFV0)
+             993:    8(fvec2) VectorTimesScalar 992 991
+                              Store 990(r2) 993
+             995:    8(fvec2) Load 15(inFV0)
+             996:    8(fvec2) Load 16(inFV1)
+             997:    6(float) Dot 995 996
+                              Store 994(r3) 997
+             999:          10 Load 17(inFM0)
+            1000:    8(fvec2) Load 15(inFV0)
+            1001:    8(fvec2) MatrixTimesVector 999 1000
+                              Store 998(r4) 1001
+            1003:    8(fvec2) Load 15(inFV0)
+            1004:          10 Load 17(inFM0)
+            1005:    8(fvec2) VectorTimesMatrix 1003 1004
+                              Store 1002(r5) 1005
+            1007:          10 Load 17(inFM0)
+            1008:    6(float) Load 13(inF0)
+            1009:          10 MatrixTimesScalar 1007 1008
+                              Store 1006(r6) 1009
+            1011:    6(float) Load 13(inF0)
+            1012:          10 Load 17(inFM0)
+            1013:          10 MatrixTimesScalar 1012 1011
+                              Store 1010(r7) 1013
+            1015:          10 Load 17(inFM0)
+            1016:          10 Load 18(inFM1)
+            1017:          10 MatrixTimesMatrix 1015 1016
+                              Store 1014(r8) 1017
                               Return
                               FunctionEnd
 32(TestGenMul(f1;f1;vf3;vf3;mf33;mf33;):           2 Function None 25
@@ -2823,51 +2823,51 @@
        30(inFM0):     24(ptr) FunctionParameter
        31(inFM1):     24(ptr) FunctionParameter
               33:             Label
-        1017(r0):      7(ptr) Variable Function
-        1021(r1):     22(ptr) Variable Function
-        1025(r2):     22(ptr) Variable Function
-        1029(r3):      7(ptr) Variable Function
-        1033(r4):     22(ptr) Variable Function
-        1037(r5):     22(ptr) Variable Function
-        1041(r6):     24(ptr) Variable Function
-        1045(r7):     24(ptr) Variable Function
-        1049(r8):     24(ptr) Variable Function
-            1018:    6(float) Load 26(inF0)
-            1019:    6(float) Load 27(inF1)
-            1020:    6(float) FMul 1018 1019
-                              Store 1017(r0) 1020
-            1022:   21(fvec3) Load 28(inFV0)
-            1023:    6(float) Load 26(inF0)
-            1024:   21(fvec3) VectorTimesScalar 1022 1023
-                              Store 1021(r1) 1024
-            1026:    6(float) Load 26(inF0)
-            1027:   21(fvec3) Load 28(inFV0)
-            1028:   21(fvec3) VectorTimesScalar 1027 1026
-                              Store 1025(r2) 1028
-            1030:   21(fvec3) Load 28(inFV0)
-            1031:   21(fvec3) Load 29(inFV1)
-            1032:    6(float) Dot 1030 1031
-                              Store 1029(r3) 1032
-            1034:          23 Load 30(inFM0)
-            1035:   21(fvec3) Load 28(inFV0)
-            1036:   21(fvec3) MatrixTimesVector 1034 1035
-                              Store 1033(r4) 1036
-            1038:   21(fvec3) Load 28(inFV0)
-            1039:          23 Load 30(inFM0)
-            1040:   21(fvec3) VectorTimesMatrix 1038 1039
-                              Store 1037(r5) 1040
-            1042:          23 Load 30(inFM0)
-            1043:    6(float) Load 26(inF0)
-            1044:          23 MatrixTimesScalar 1042 1043
-                              Store 1041(r6) 1044
-            1046:    6(float) Load 26(inF0)
-            1047:          23 Load 30(inFM0)
-            1048:          23 MatrixTimesScalar 1047 1046
-                              Store 1045(r7) 1048
-            1050:          23 Load 30(inFM0)
-            1051:          23 Load 31(inFM1)
-            1052:          23 MatrixTimesMatrix 1050 1051
-                              Store 1049(r8) 1052
+        1018(r0):      7(ptr) Variable Function
+        1022(r1):     22(ptr) Variable Function
+        1026(r2):     22(ptr) Variable Function
+        1030(r3):      7(ptr) Variable Function
+        1034(r4):     22(ptr) Variable Function
+        1038(r5):     22(ptr) Variable Function
+        1042(r6):     24(ptr) Variable Function
+        1046(r7):     24(ptr) Variable Function
+        1050(r8):     24(ptr) Variable Function
+            1019:    6(float) Load 26(inF0)
+            1020:    6(float) Load 27(inF1)
+            1021:    6(float) FMul 1019 1020
+                              Store 1018(r0) 1021
+            1023:   21(fvec3) Load 28(inFV0)
+            1024:    6(float) Load 26(inF0)
+            1025:   21(fvec3) VectorTimesScalar 1023 1024
+                              Store 1022(r1) 1025
+            1027:    6(float) Load 26(inF0)
+            1028:   21(fvec3) Load 28(inFV0)
+            1029:   21(fvec3) VectorTimesScalar 1028 1027
+                              Store 1026(r2) 1029
+            1031:   21(fvec3) Load 28(inFV0)
+            1032:   21(fvec3) Load 29(inFV1)
+            1033:    6(float) Dot 1031 1032
+                              Store 1030(r3) 1033
+            1035:          23 Load 30(inFM0)
+            1036:   21(fvec3) Load 28(inFV0)
+            1037:   21(fvec3) MatrixTimesVector 1035 1036
+                              Store 1034(r4) 1037
+            1039:   21(fvec3) Load 28(inFV0)
+            1040:          23 Load 30(inFM0)
+            1041:   21(fvec3) VectorTimesMatrix 1039 1040
+                              Store 1038(r5) 1041
+            1043:          23 Load 30(inFM0)
+            1044:    6(float) Load 26(inF0)
+            1045:          23 MatrixTimesScalar 1043 1044
+                              Store 1042(r6) 1045
+            1047:    6(float) Load 26(inF0)
+            1048:          23 Load 30(inFM0)
+            1049:          23 MatrixTimesScalar 1048 1047
+                              Store 1046(r7) 1049
+            1051:          23 Load 30(inFM0)
+            1052:          23 Load 31(inFM1)
+            1053:          23 MatrixTimesMatrix 1051 1052
+                              Store 1050(r8) 1053
                               Return
                               FunctionEnd
 45(TestGenMul(f1;f1;vf4;vf4;mf44;mf44;):           2 Function None 38
@@ -2878,50 +2878,50 @@
        43(inFM0):     37(ptr) FunctionParameter
        44(inFM1):     37(ptr) FunctionParameter
               46:             Label
-        1053(r0):      7(ptr) Variable Function
-        1057(r1):     35(ptr) Variable Function
-        1061(r2):     35(ptr) Variable Function
-        1065(r3):      7(ptr) Variable Function
-        1069(r4):     35(ptr) Variable Function
-        1073(r5):     35(ptr) Variable Function
-        1077(r6):     37(ptr) Variable Function
-        1081(r7):     37(ptr) Variable Function
-        1085(r8):     37(ptr) Variable Function
-            1054:    6(float) Load 39(inF0)
-            1055:    6(float) Load 40(inF1)
-            1056:    6(float) FMul 1054 1055
-                              Store 1053(r0) 1056
-            1058:   34(fvec4) Load 41(inFV0)
-            1059:    6(float) Load 39(inF0)
-            1060:   34(fvec4) VectorTimesScalar 1058 1059
-                              Store 1057(r1) 1060
-            1062:    6(float) Load 39(inF0)
-            1063:   34(fvec4) Load 41(inFV0)
-            1064:   34(fvec4) VectorTimesScalar 1063 1062
-                              Store 1061(r2) 1064
-            1066:   34(fvec4) Load 41(inFV0)
-            1067:   34(fvec4) Load 42(inFV1)
-            1068:    6(float) Dot 1066 1067
-                              Store 1065(r3) 1068
-            1070:          36 Load 43(inFM0)
-            1071:   34(fvec4) Load 41(inFV0)
-            1072:   34(fvec4) MatrixTimesVector 1070 1071
-                              Store 1069(r4) 1072
-            1074:   34(fvec4) Load 41(inFV0)
-            1075:          36 Load 43(inFM0)
-            1076:   34(fvec4) VectorTimesMatrix 1074 1075
-                              Store 1073(r5) 1076
-            1078:          36 Load 43(inFM0)
-            1079:    6(float) Load 39(inF0)
-            1080:          36 MatrixTimesScalar 1078 1079
-                              Store 1077(r6) 1080
-            1082:    6(float) Load 39(inF0)
-            1083:          36 Load 43(inFM0)
-            1084:          36 MatrixTimesScalar 1083 1082
-                              Store 1081(r7) 1084
-            1086:          36 Load 43(inFM0)
-            1087:          36 Load 44(inFM1)
-            1088:          36 MatrixTimesMatrix 1086 1087
-                              Store 1085(r8) 1088
+        1054(r0):      7(ptr) Variable Function
+        1058(r1):     35(ptr) Variable Function
+        1062(r2):     35(ptr) Variable Function
+        1066(r3):      7(ptr) Variable Function
+        1070(r4):     35(ptr) Variable Function
+        1074(r5):     35(ptr) Variable Function
+        1078(r6):     37(ptr) Variable Function
+        1082(r7):     37(ptr) Variable Function
+        1086(r8):     37(ptr) Variable Function
+            1055:    6(float) Load 39(inF0)
+            1056:    6(float) Load 40(inF1)
+            1057:    6(float) FMul 1055 1056
+                              Store 1054(r0) 1057
+            1059:   34(fvec4) Load 41(inFV0)
+            1060:    6(float) Load 39(inF0)
+            1061:   34(fvec4) VectorTimesScalar 1059 1060
+                              Store 1058(r1) 1061
+            1063:    6(float) Load 39(inF0)
+            1064:   34(fvec4) Load 41(inFV0)
+            1065:   34(fvec4) VectorTimesScalar 1064 1063
+                              Store 1062(r2) 1065
+            1067:   34(fvec4) Load 41(inFV0)
+            1068:   34(fvec4) Load 42(inFV1)
+            1069:    6(float) Dot 1067 1068
+                              Store 1066(r3) 1069
+            1071:          36 Load 43(inFM0)
+            1072:   34(fvec4) Load 41(inFV0)
+            1073:   34(fvec4) MatrixTimesVector 1071 1072
+                              Store 1070(r4) 1073
+            1075:   34(fvec4) Load 41(inFV0)
+            1076:          36 Load 43(inFM0)
+            1077:   34(fvec4) VectorTimesMatrix 1075 1076
+                              Store 1074(r5) 1077
+            1079:          36 Load 43(inFM0)
+            1080:    6(float) Load 39(inF0)
+            1081:          36 MatrixTimesScalar 1079 1080
+                              Store 1078(r6) 1081
+            1083:    6(float) Load 39(inF0)
+            1084:          36 Load 43(inFM0)
+            1085:          36 MatrixTimesScalar 1084 1083
+                              Store 1082(r7) 1085
+            1087:          36 Load 43(inFM0)
+            1088:          36 Load 44(inFM1)
+            1089:          36 MatrixTimesMatrix 1087 1088
+                              Store 1086(r8) 1089
                               Return
                               FunctionEnd
diff --git a/Test/hlsl.intrinsics.barriers.comp b/Test/hlsl.intrinsics.barriers.comp
new file mode 100644
index 0000000..c9f6a8d
--- /dev/null
+++ b/Test/hlsl.intrinsics.barriers.comp
@@ -0,0 +1,13 @@
+
+float ComputeShaderFunction()
+{
+    AllMemoryBarrier();
+    AllMemoryBarrierWithGroupSync();
+    DeviceMemoryBarrier();
+    DeviceMemoryBarrierWithGroupSync();
+    GroupMemoryBarrier();
+    GroupMemoryBarrierWithGroupSync();
+
+    return 0.0;
+}
+
diff --git a/Test/hlsl.intrinsics.evalfns.frag b/Test/hlsl.intrinsics.evalfns.frag
new file mode 100644
index 0000000..9638706
--- /dev/null
+++ b/Test/hlsl.intrinsics.evalfns.frag
@@ -0,0 +1,10 @@
+
+void main(float inF1, float2 inF2, float3 inF3, float4 inF4, int2 inI2) : COLOR
+{
+    EvaluateAttributeSnapped(inF1, int2(8,15));
+    EvaluateAttributeSnapped(inF2, int2(0,1));
+    EvaluateAttributeSnapped(inF3, int2(3,10));
+    EvaluateAttributeSnapped(inF4, int2(7,8));
+
+    EvaluateAttributeSnapped(inF1, inI2);
+}
diff --git a/Test/hlsl.intrinsics.f1632.frag b/Test/hlsl.intrinsics.f1632.frag
new file mode 100644
index 0000000..4a68a67
--- /dev/null
+++ b/Test/hlsl.intrinsics.f1632.frag
@@ -0,0 +1,34 @@
+float PixelShaderFunction(float inF0)
+{
+    f32tof16(inF0);
+
+    return 0.0;
+}
+
+float1 PixelShaderFunction(float1 inF0)
+{
+    // TODO: ... add when float1 prototypes are generated
+    return 0.0;
+}
+
+float2 PixelShaderFunction(float2 inF0)
+{
+    f32tof16(inF0);
+
+    return float2(1,2);
+}
+
+float3 PixelShaderFunction(float3 inF0)
+{
+    f32tof16(inF0);
+
+    return float3(1,2,3);
+}
+
+float4 PixelShaderFunction(float4 inF0)
+{
+    f32tof16(inF0);
+
+    return float4(1,2,3,4);
+}
+
diff --git a/Test/hlsl.intrinsics.lit.frag b/Test/hlsl.intrinsics.lit.frag
new file mode 100644
index 0000000..bf4069a
--- /dev/null
+++ b/Test/hlsl.intrinsics.lit.frag
@@ -0,0 +1,4 @@
+void PixelShaderFunction(float n_dot_l, float n_dot_h, float m)
+{
+    float4 r0 = lit(n_dot_l, n_dot_h, m);
+}
diff --git a/Test/hlsl.intrinsics.negative.vert b/Test/hlsl.intrinsics.negative.vert
index c37d8bc..e716c68 100644
--- a/Test/hlsl.intrinsics.negative.vert
+++ b/Test/hlsl.intrinsics.negative.vert
@@ -15,8 +15,8 @@
 {
     uint out_u1;
 
-    // AllMemoryBarrier();              // invalid in fragment stage  TODO: parser currently crashes on empty arg list
-    // AllMemoryBarrierWithGroupSync(); // invalid in fragment stage  TODO: parser currently crashes on empty arg list
+    AllMemoryBarrier();                       // expected error: only valid in compute stage
+    AllMemoryBarrierWithGroupSync();          // expected error: only valid in compute stage
     asdouble(inF0, inF1);                     // expected error: only integer inputs
     CheckAccessFullyMapped(3.0);              // expected error: only valid on integers
     CheckAccessFullyMapped(3);                // expected error: only valid in pixel & compute stages
@@ -24,8 +24,8 @@
     countbits(inF0);                          // expected error: only integer inputs
     cross(inF0, inF1);                        // expected error: only on float3 inputs
     D3DCOLORtoUBYTE4(inF0);                   // expected error: only on float4 inputs
-    // DeviceMemoryBarrier();                    // TODO: expected error: only valid in pixel & compute stages
-    // DeviceMemoryBarrierWithGroupSync();       // TODO: expected error: only valid in compute stage
+    DeviceMemoryBarrier();                    // expected error: only valid in pixel & compute stages
+    DeviceMemoryBarrierWithGroupSync();       // expected error: only valid in compute stage
     ddx(inF0);                                // expected error: only valid in pixel stage
     ddx_coarse(inF0);                         // expected error: only valid in pixel stage
     ddx_fine(inF0);                           // expected error: only valid in pixel stage
@@ -55,8 +55,8 @@
     InterlockedOr(gs_ua, gs_ub, out_u1);      // expected error: only valid in pixel stage
     InterlockedXor(gs_ua, gs_ub);             // expected error: only valid in pixel stage
     InterlockedXor(gs_ua, gs_ub, out_u1);     // expected error: only valid in pixel stage
-    // GroupMemoryBarrier();               // TODO: expected error: only valid in compute stage
-    // GroupMemoryBarrierWithGroupSync();  // TODO: expected error: only valid in compute stage
+    GroupMemoryBarrier();                     // expected error: only valid in compute stage
+    GroupMemoryBarrierWithGroupSync();        // expected error: only valid in compute stage
     length(inF0);                             // expect error: invalid on scalars
     msad4(inF0, float2(0), float4(0));        // expected error: only integer inputs
     normalize(inF0);                          // expect error: invalid on scalars
diff --git a/glslang/Include/intermediate.h b/glslang/Include/intermediate.h
index 95363d5..1dee257 100644
--- a/glslang/Include/intermediate.h
+++ b/glslang/Include/intermediate.h
@@ -515,7 +515,14 @@
     EOpInterlockedMin,      // ...
     EOpInterlockedOr,       // ...
     EOpInterlockedXor,      // ...
-
+    EOpAllMemoryBarrierWithGroupSync,    // memory barriers without non-hlsl AST equivalents
+    EOpGroupMemoryBarrierWithGroupSync,  // ...
+    EOpWorkgroupMemoryBarrier,           // ...
+    EOpWorkgroupMemoryBarrierWithGroupSync, // ...
+    EOpEvaluateAttributeSnapped,         // InterpolateAtOffset with int position on 16x16 grid
+    EOpF32tof16,                         // HLSL conversion: half of a PackHalf2x16
+    EOpF16tof32,                         // HLSL conversion: half of an UnpackHalf2x16
+    EOpLit,                              // HLSL lighting coefficient vector
 };
 
 class TIntermTraverser;
diff --git a/glslang/MachineIndependent/intermOut.cpp b/glslang/MachineIndependent/intermOut.cpp
index a5a6f2c..03519bc 100644
--- a/glslang/MachineIndependent/intermOut.cpp
+++ b/glslang/MachineIndependent/intermOut.cpp
@@ -543,6 +543,11 @@
     case EOpSinCos:                     out.debug << "sincos";                break;
     case EOpGenMul:                     out.debug << "mul";                   break;
 
+    case EOpAllMemoryBarrierWithGroupSync:    out.debug << "AllMemoryBarrierWithGroupSync";    break;
+    case EOpGroupMemoryBarrierWithGroupSync: out.debug << "GroupMemoryBarrierWithGroupSync"; break;
+    case EOpWorkgroupMemoryBarrier:           out.debug << "WorkgroupMemoryBarrier";           break;
+    case EOpWorkgroupMemoryBarrierWithGroupSync: out.debug << "WorkgroupMemoryBarrierWithGroupSync"; break;
+
     default: out.debug.message(EPrefixError, "Bad aggregation op");
     }
 
diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp
index 22f48d7..51be6f7 100644
--- a/gtests/Hlsl.FromFile.cpp
+++ b/gtests/Hlsl.FromFile.cpp
@@ -81,8 +81,12 @@
         {"hlsl.float4.frag", "PixelShaderFunction"},
         {"hlsl.forLoop.frag", "PixelShaderFunction"},
         {"hlsl.if.frag", "PixelShaderFunction"},
+        {"hlsl.intrinsics.barriers.comp", "ComputeShaderFunction"},
         {"hlsl.intrinsics.comp", "ComputeShaderFunction"},
+        {"hlsl.intrinsics.evalfns.frag", "main"},
+        {"hlsl.intrinsics.f1632.frag", "PixelShaderFunction"},
         {"hlsl.intrinsics.frag", "PixelShaderFunction"},
+        {"hlsl.intrinsics.lit.frag", "PixelShaderFunction"},
         {"hlsl.intrinsics.negative.comp", "ComputeShaderFunction"},
         {"hlsl.intrinsics.negative.frag", "PixelShaderFunction"},
         {"hlsl.intrinsics.negative.vert", "VertexShaderFunction"},
diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp
index 4d46314..228ac4f 100755
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -967,6 +967,7 @@
             dst->getSequence().push_back(handleBinaryMath(loc, "mul", EOpMul, src0y, src1y));
             dst->getSequence().push_back(src0z);
             dst->getSequence().push_back(src1w);
+            dst->setType(TType(EbtFloat, EvqTemporary, 4));
             dst->setLoc(loc);
             node = dst;
 
@@ -1028,6 +1029,90 @@
             break;
         }
 
+    case EOpEvaluateAttributeSnapped:
+        {
+            // SPIR-V InterpolateAtOffset uses float vec2 offset in pixels
+            // HLSL uses int2 offset on a 16x16 grid in [-8..7] on x & y:
+            //   iU = (iU<<28)>>28
+            //   fU = ((float)iU)/16
+            // Targets might handle this natively, in which case they can disable
+            // decompositions.
+
+            TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();  // value
+            TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();  // offset
+
+            TIntermTyped* i28 = intermediate.addConstantUnion(28, loc, true);
+            TIntermTyped* iU = handleBinaryMath(loc, ">>", EOpRightShift,
+                                                handleBinaryMath(loc, "<<", EOpLeftShift, arg1, i28),
+                                                i28);
+
+            TIntermTyped* recip16 = intermediate.addConstantUnion((1.0/16.0), EbtFloat, loc, true);
+            TIntermTyped* floatOffset = handleBinaryMath(loc, "mul", EOpMul,
+                                                         intermediate.addConversion(EOpConstructFloat,
+                                                                                    TType(EbtFloat, EvqTemporary, 2), iU),
+                                                         recip16);
+            
+            TIntermAggregate* interp = new TIntermAggregate(EOpInterpolateAtOffset);
+            interp->getSequence().push_back(arg0);
+            interp->getSequence().push_back(floatOffset);
+            interp->setLoc(loc);
+            interp->setType(arg0->getType());
+            interp->getWritableType().getQualifier().makeTemporary();
+
+            node = interp;
+
+            break;
+        }
+
+    case EOpLit:
+        {
+            TIntermTyped* n_dot_l = argAggregate->getSequence()[0]->getAsTyped();
+            TIntermTyped* n_dot_h = argAggregate->getSequence()[1]->getAsTyped();
+            TIntermTyped* m = argAggregate->getSequence()[2]->getAsTyped();
+
+            TIntermAggregate* dst = new TIntermAggregate(EOpConstructVec4);
+
+            // Ambient
+            dst->getSequence().push_back(intermediate.addConstantUnion(1.0, EbtFloat, loc, true));
+
+            // Diffuse:
+            TIntermTyped* zero = intermediate.addConstantUnion(0.0, EbtFloat, loc, true);
+            TIntermAggregate* diffuse = new TIntermAggregate(EOpMax);
+            diffuse->getSequence().push_back(n_dot_l);
+            diffuse->getSequence().push_back(zero);
+            diffuse->setLoc(loc);
+            diffuse->setType(TType(EbtFloat));
+            dst->getSequence().push_back(diffuse);
+
+            // Specular:
+            TIntermAggregate* min_ndot = new TIntermAggregate(EOpMin);
+            min_ndot->getSequence().push_back(n_dot_l);
+            min_ndot->getSequence().push_back(n_dot_h);
+            min_ndot->setLoc(loc);
+            min_ndot->setType(TType(EbtFloat));
+
+            TIntermTyped* compare = handleBinaryMath(loc, "<", EOpLessThan, min_ndot, zero);
+            TIntermTyped* n_dot_h_m = handleBinaryMath(loc, "mul", EOpMul, n_dot_h, m);  // n_dot_h * m
+
+            dst->getSequence().push_back(intermediate.addSelection(compare, zero, n_dot_h_m, loc));
+            
+            // One:
+            dst->getSequence().push_back(intermediate.addConstantUnion(1.0, EbtFloat, loc, true));
+
+            dst->setLoc(loc);
+            dst->setType(TType(EbtFloat, EvqTemporary, 4));
+            node = dst;
+            break;
+        }
+
+    case EOpF16tof32:
+    case EOpF32tof16:
+        {
+            // Temporary until decomposition is available.
+            error(loc, "unimplemented intrinsic: handle natively", "f32tof16", "");
+            break;
+        }
+
     default:
         break; // most pass through unchanged
     }
diff --git a/hlsl/hlslParseables.cpp b/hlsl/hlslParseables.cpp
index 1137beb..cf8574c 100755
--- a/hlsl/hlslParseables.cpp
+++ b/hlsl/hlslParseables.cpp
@@ -283,7 +283,7 @@
         // { "errorf",                           "-",     "-",       "",         "",     EShLangAll }, TODO: varargs
         { "EvaluateAttributeAtCentroid",      nullptr, nullptr,   "SVM",        "F",      EShLangFragmentMask },
         { "EvaluateAttributeAtSample",        nullptr, nullptr,   "SVM,S",      "F,U",    EShLangFragmentMask },
-        { "EvaluateAttributeSnapped",         nullptr, nullptr,   "SVM,V2",     "F,F",    EShLangFragmentMask },
+        { "EvaluateAttributeSnapped",         nullptr, nullptr,   "SVM,V2",     "F,I",    EShLangFragmentMask },
         { "exp",                              nullptr, nullptr,   "SVM",        "F",      EShLangAll },
         { "exp2",                             nullptr, nullptr,   "SVM",        "F",      EShLangAll },
         { "f16tof32",                         nullptr, "F",       "SV",         "U",      EShLangAll },
@@ -519,8 +519,8 @@
     symbolTable.relateToOperator("abs",                         EOpAbs);
     symbolTable.relateToOperator("acos",                        EOpAcos);
     symbolTable.relateToOperator("all",                         EOpAll);
-    // symbolTable.relateToOperator("AllMemoryBarrier");
-    // symbolTable.relateToOperator("AllMemoryBarrierWithGroupSync");
+    symbolTable.relateToOperator("AllMemoryBarrier",            EOpMemoryBarrier);
+    symbolTable.relateToOperator("AllMemoryBarrierWithGroupSync", EOpAllMemoryBarrierWithGroupSync);
     symbolTable.relateToOperator("any",                         EOpAny);
     symbolTable.relateToOperator("asdouble",                    EOpUint64BitsToDouble);
     symbolTable.relateToOperator("asfloat",                     EOpIntBitsToFloat);
@@ -546,19 +546,19 @@
     symbolTable.relateToOperator("ddy_fine",                    EOpDPdyFine);
     symbolTable.relateToOperator("degrees",                     EOpDegrees);
     symbolTable.relateToOperator("determinant",                 EOpDeterminant);
-    // symbolTable.relateToOperator("DeviceMemoryBarrier");
-    // symbolTable.relateToOperator("DeviceMemoryBarrierWithGroupSync");
+    symbolTable.relateToOperator("DeviceMemoryBarrier",         EOpGroupMemoryBarrier); // == ScopeDevice+CrossWorkGroup
+    symbolTable.relateToOperator("DeviceMemoryBarrierWithGroupSync", EOpGroupMemoryBarrierWithGroupSync); // ...
     symbolTable.relateToOperator("distance",                    EOpDistance);
     symbolTable.relateToOperator("dot",                         EOpDot);
     symbolTable.relateToOperator("dst",                         EOpDst);
-    // symbolTable.relateToOperator("errorf");
+    // symbolTable.relateToOperator("errorf",                      EOpErrorf);
     symbolTable.relateToOperator("EvaluateAttributeAtCentroid", EOpInterpolateAtCentroid);
     symbolTable.relateToOperator("EvaluateAttributeAtSample",   EOpInterpolateAtSample);
-    // symbolTable.relateToOperator("EvaluateAttributeSnapped");  // TODO: hsnflr positions.  new op?
+    symbolTable.relateToOperator("EvaluateAttributeSnapped",    EOpEvaluateAttributeSnapped);
     symbolTable.relateToOperator("exp",                         EOpExp);
     symbolTable.relateToOperator("exp2",                        EOpExp2);
-    // symbolTable.relateToOperator("f16tof32");
-    // symbolTable.relateToOperator("f32tof16");
+    symbolTable.relateToOperator("f16tof32",                    EOpF16tof32);
+    symbolTable.relateToOperator("f32tof16",                    EOpF32tof16);
     symbolTable.relateToOperator("faceforward",                 EOpFaceForward);
     symbolTable.relateToOperator("firstbithigh",                EOpFindMSB);
     symbolTable.relateToOperator("firstbitlow",                 EOpFindLSB);
@@ -570,8 +570,8 @@
     symbolTable.relateToOperator("fwidth",                      EOpFwidth);
     // symbolTable.relateToOperator("GetRenderTargetSampleCount");
     // symbolTable.relateToOperator("GetRenderTargetSamplePosition");
-    // symbolTable.relateToOperator("GroupMemoryBarrier");
-    // symbolTable.relateToOperator("GroupMemoryBarrierWithGroupSync");
+    symbolTable.relateToOperator("GroupMemoryBarrier",          EOpWorkgroupMemoryBarrier);
+    symbolTable.relateToOperator("GroupMemoryBarrierWithGroupSync", EOpWorkgroupMemoryBarrierWithGroupSync);
     symbolTable.relateToOperator("InterlockedAdd",              EOpInterlockedAdd);
     symbolTable.relateToOperator("InterlockedAnd",              EOpInterlockedAnd);
     symbolTable.relateToOperator("InterlockedCompareExchange",  EOpInterlockedCompareExchange);
@@ -586,7 +586,7 @@
     symbolTable.relateToOperator("isnan",                       EOpIsNan);
     symbolTable.relateToOperator("ldexp",                       EOpLdexp);
     symbolTable.relateToOperator("length",                      EOpLength);
-    // symbolTable.relateToOperator("lit");
+    symbolTable.relateToOperator("lit",                         EOpLit);
     symbolTable.relateToOperator("log",                         EOpLog);
     symbolTable.relateToOperator("log10",                       EOpLog10);
     symbolTable.relateToOperator("log2",                        EOpLog2);
@@ -599,7 +599,7 @@
     // symbolTable.relateToOperator("noise",                    EOpNoise); // TODO: check return type
     symbolTable.relateToOperator("normalize",                   EOpNormalize);
     symbolTable.relateToOperator("pow",                         EOpPow);
-    // symbolTable.relateToOperator("printf");
+    // symbolTable.relateToOperator("printf",                     EOpPrintf);
     // symbolTable.relateToOperator("Process2DQuadTessFactorsAvg");
     // symbolTable.relateToOperator("Process2DQuadTessFactorsMax");
     // symbolTable.relateToOperator("Process2DQuadTessFactorsMin");