Add --hlsl-dx-position-w option

This reciprocates the w component of SV_Position in HLSL fragment shaders
to provide DirectX compatibility for HLSL shaders in Vulkan.

Fixes #2244
diff --git a/StandAlone/StandAlone.cpp b/StandAlone/StandAlone.cpp
index 23e510c..4deaf4d 100644
--- a/StandAlone/StandAlone.cpp
+++ b/StandAlone/StandAlone.cpp
@@ -177,6 +177,7 @@
 const char* variableName = nullptr;
 bool HlslEnable16BitTypes = false;
 bool HlslDX9compatible = false;
+bool HlslDxPositionW = false;
 bool DumpBuiltinSymbols = false;
 std::vector<std::string> IncludeDirectoryList;
 
@@ -662,6 +663,8 @@
                         HlslEnable16BitTypes = true;
                     } else if (lowerword == "hlsl-dx9-compatible") {
                         HlslDX9compatible = true;
+                    } else if (lowerword == "hlsl-dx-position-w") {
+                        HlslDxPositionW = true;
                     } else if (lowerword == "auto-sampled-textures") { 
                         autoSampledTextures = true;
                     } else if (lowerword == "invert-y" ||  // synonyms
@@ -1284,6 +1287,9 @@
         if (Options & EOptionInvertY)
             shader->setInvertY(true);
 
+        if (HlslDxPositionW)
+            shader->setDxPositionW(true);
+
         // Set up the environment, some subsettings take precedence over earlier
         // ways of setting things.
         if (Options & EOptionSpv) {
@@ -1847,6 +1853,8 @@
            "  --hlsl-dx9-compatible             interprets sampler declarations as a\n"
            "                                    texture/sampler combo like DirectX9 would,\n"
            "                                    and recognizes DirectX9-specific semantics\n"
+           "  --hlsl-dx-position-w              W component of SV_Position in HLSL fragment\n"
+           "                                    shaders compatible with DirectX\n"
            "  --invert-y | --iy                 invert position.Y output in vertex shader\n"
            "  --keep-uncalled | --ku            don't eliminate uncalled functions\n"
            "  --nan-clamp                       favor non-NaN operand in min, max, and clamp\n"
diff --git a/Test/baseResults/hlsl.w-recip.frag.out b/Test/baseResults/hlsl.w-recip.frag.out
new file mode 100644
index 0000000..b72f361
--- /dev/null
+++ b/Test/baseResults/hlsl.w-recip.frag.out
@@ -0,0 +1,268 @@
+hlsl.w-recip.frag
+Shader version: 500
+gl_FragCoord origin is upper left
+0:? Sequence
+0:5  Function Definition: @main(vf4; ( temp 4-component vector of float)
+0:5    Function Parameters: 
+0:5      'vpos' ( in 4-component vector of float)
+0:?     Sequence
+0:6      Sequence
+0:6        move second child to first child ( temp 4-component vector of float)
+0:6          'vpos_t' ( temp 4-component vector of float)
+0:6          Construct vec4 ( temp 4-component vector of float)
+0:6            vector swizzle ( temp 3-component vector of float)
+0:6              'vpos' ( in 4-component vector of float)
+0:6              Sequence
+0:6                Constant:
+0:6                  0 (const int)
+0:6                Constant:
+0:6                  1 (const int)
+0:6                Constant:
+0:6                  2 (const int)
+0:6            divide ( temp float)
+0:6              Constant:
+0:6                1.000000
+0:6              direct index ( temp float)
+0:6                'vpos' ( in 4-component vector of float)
+0:6                Constant:
+0:6                  3 (const int)
+0:7      Test condition and select ( temp void)
+0:7        Condition
+0:7        Compare Less Than ( temp bool)
+0:7          direct index ( temp float)
+0:7            'vpos_t' ( temp 4-component vector of float)
+0:7            Constant:
+0:7              0 (const int)
+0:7          Constant:
+0:7            400.000000
+0:7        true case
+0:8        Branch: Return with expression
+0:8          AmbientColor: direct index for structure ( uniform 4-component vector of float)
+0:8            'anon@0' (layout( row_major std140) uniform block{ uniform 4-component vector of float AmbientColor,  uniform 4-component vector of float AmbientColor2})
+0:8            Constant:
+0:8              0 (const uint)
+0:7        false case
+0:10        Branch: Return with expression
+0:10          AmbientColor2: direct index for structure ( uniform 4-component vector of float)
+0:10            'anon@0' (layout( row_major std140) uniform block{ uniform 4-component vector of float AmbientColor,  uniform 4-component vector of float AmbientColor2})
+0:10            Constant:
+0:10              1 (const uint)
+0:5  Function Definition: main( ( temp void)
+0:5    Function Parameters: 
+0:?     Sequence
+0:5      move second child to first child ( temp 4-component vector of float)
+0:?         'vpos' ( temp 4-component vector of float)
+0:5        Construct vec4 ( temp 4-component vector of float)
+0:5          vector swizzle ( temp 3-component vector of float)
+0:?             'vpos' ( in 4-component vector of float FragCoord)
+0:5            Sequence
+0:5              Constant:
+0:5                0 (const int)
+0:5              Constant:
+0:5                1 (const int)
+0:5              Constant:
+0:5                2 (const int)
+0:5          divide ( temp float)
+0:5            Constant:
+0:5              1.000000
+0:5            direct index ( temp float)
+0:?               'vpos' ( in 4-component vector of float FragCoord)
+0:5              Constant:
+0:5                3 (const int)
+0:5      move second child to first child ( temp 4-component vector of float)
+0:?         '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:5        Function Call: @main(vf4; ( temp 4-component vector of float)
+0:?           'vpos' ( temp 4-component vector of float)
+0:?   Linker Objects
+0:?     'anon@0' (layout( row_major std140) uniform block{ uniform 4-component vector of float AmbientColor,  uniform 4-component vector of float AmbientColor2})
+0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:?     'vpos' ( in 4-component vector of float FragCoord)
+
+
+Linked fragment stage:
+
+
+Shader version: 500
+gl_FragCoord origin is upper left
+0:? Sequence
+0:5  Function Definition: @main(vf4; ( temp 4-component vector of float)
+0:5    Function Parameters: 
+0:5      'vpos' ( in 4-component vector of float)
+0:?     Sequence
+0:6      Sequence
+0:6        move second child to first child ( temp 4-component vector of float)
+0:6          'vpos_t' ( temp 4-component vector of float)
+0:6          Construct vec4 ( temp 4-component vector of float)
+0:6            vector swizzle ( temp 3-component vector of float)
+0:6              'vpos' ( in 4-component vector of float)
+0:6              Sequence
+0:6                Constant:
+0:6                  0 (const int)
+0:6                Constant:
+0:6                  1 (const int)
+0:6                Constant:
+0:6                  2 (const int)
+0:6            divide ( temp float)
+0:6              Constant:
+0:6                1.000000
+0:6              direct index ( temp float)
+0:6                'vpos' ( in 4-component vector of float)
+0:6                Constant:
+0:6                  3 (const int)
+0:7      Test condition and select ( temp void)
+0:7        Condition
+0:7        Compare Less Than ( temp bool)
+0:7          direct index ( temp float)
+0:7            'vpos_t' ( temp 4-component vector of float)
+0:7            Constant:
+0:7              0 (const int)
+0:7          Constant:
+0:7            400.000000
+0:7        true case
+0:8        Branch: Return with expression
+0:8          AmbientColor: direct index for structure ( uniform 4-component vector of float)
+0:8            'anon@0' (layout( row_major std140) uniform block{ uniform 4-component vector of float AmbientColor,  uniform 4-component vector of float AmbientColor2})
+0:8            Constant:
+0:8              0 (const uint)
+0:7        false case
+0:10        Branch: Return with expression
+0:10          AmbientColor2: direct index for structure ( uniform 4-component vector of float)
+0:10            'anon@0' (layout( row_major std140) uniform block{ uniform 4-component vector of float AmbientColor,  uniform 4-component vector of float AmbientColor2})
+0:10            Constant:
+0:10              1 (const uint)
+0:5  Function Definition: main( ( temp void)
+0:5    Function Parameters: 
+0:?     Sequence
+0:5      move second child to first child ( temp 4-component vector of float)
+0:?         'vpos' ( temp 4-component vector of float)
+0:5        Construct vec4 ( temp 4-component vector of float)
+0:5          vector swizzle ( temp 3-component vector of float)
+0:?             'vpos' ( in 4-component vector of float FragCoord)
+0:5            Sequence
+0:5              Constant:
+0:5                0 (const int)
+0:5              Constant:
+0:5                1 (const int)
+0:5              Constant:
+0:5                2 (const int)
+0:5          divide ( temp float)
+0:5            Constant:
+0:5              1.000000
+0:5            direct index ( temp float)
+0:?               'vpos' ( in 4-component vector of float FragCoord)
+0:5              Constant:
+0:5                3 (const int)
+0:5      move second child to first child ( temp 4-component vector of float)
+0:?         '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:5        Function Call: @main(vf4; ( temp 4-component vector of float)
+0:?           'vpos' ( temp 4-component vector of float)
+0:?   Linker Objects
+0:?     'anon@0' (layout( row_major std140) uniform block{ uniform 4-component vector of float AmbientColor,  uniform 4-component vector of float AmbientColor2})
+0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:?     'vpos' ( in 4-component vector of float FragCoord)
+
+// Module Version 10000
+// Generated by (magic number): 8000a
+// Id's are bound by 69
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "main" 53 65
+                              ExecutionMode 4 OriginUpperLeft
+                              Source HLSL 500
+                              Name 4  "main"
+                              Name 11  "@main(vf4;"
+                              Name 10  "vpos"
+                              Name 13  "vpos_t"
+                              Name 36  "$Global"
+                              MemberName 36($Global) 0  "AmbientColor"
+                              MemberName 36($Global) 1  "AmbientColor2"
+                              Name 38  ""
+                              Name 51  "vpos"
+                              Name 53  "vpos"
+                              Name 65  "@entryPointOutput"
+                              Name 66  "param"
+                              MemberDecorate 36($Global) 0 Offset 0
+                              MemberDecorate 36($Global) 1 Offset 16
+                              Decorate 36($Global) Block
+                              Decorate 38 DescriptorSet 0
+                              Decorate 38 Binding 0
+                              Decorate 53(vpos) BuiltIn FragCoord
+                              Decorate 65(@entryPointOutput) Location 0
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+               8:             TypePointer Function 7(fvec4)
+               9:             TypeFunction 7(fvec4) 8(ptr)
+              14:             TypeVector 6(float) 3
+              17:    6(float) Constant 1065353216
+              18:             TypeInt 32 0
+              19:     18(int) Constant 3
+              20:             TypePointer Function 6(float)
+              28:     18(int) Constant 0
+              31:    6(float) Constant 1137180672
+              32:             TypeBool
+     36($Global):             TypeStruct 7(fvec4) 7(fvec4)
+              37:             TypePointer Uniform 36($Global)
+              38:     37(ptr) Variable Uniform
+              39:             TypeInt 32 1
+              40:     39(int) Constant 0
+              41:             TypePointer Uniform 7(fvec4)
+              46:     39(int) Constant 1
+              52:             TypePointer Input 7(fvec4)
+        53(vpos):     52(ptr) Variable Input
+              56:             TypePointer Input 6(float)
+              64:             TypePointer Output 7(fvec4)
+65(@entryPointOutput):     64(ptr) Variable Output
+         4(main):           2 Function None 3
+               5:             Label
+        51(vpos):      8(ptr) Variable Function
+       66(param):      8(ptr) Variable Function
+              54:    7(fvec4) Load 53(vpos)
+              55:   14(fvec3) VectorShuffle 54 54 0 1 2
+              57:     56(ptr) AccessChain 53(vpos) 19
+              58:    6(float) Load 57
+              59:    6(float) FDiv 17 58
+              60:    6(float) CompositeExtract 55 0
+              61:    6(float) CompositeExtract 55 1
+              62:    6(float) CompositeExtract 55 2
+              63:    7(fvec4) CompositeConstruct 60 61 62 59
+                              Store 51(vpos) 63
+              67:    7(fvec4) Load 51(vpos)
+                              Store 66(param) 67
+              68:    7(fvec4) FunctionCall 11(@main(vf4;) 66(param)
+                              Store 65(@entryPointOutput) 68
+                              Return
+                              FunctionEnd
+  11(@main(vf4;):    7(fvec4) Function None 9
+        10(vpos):      8(ptr) FunctionParameter
+              12:             Label
+      13(vpos_t):      8(ptr) Variable Function
+              15:    7(fvec4) Load 10(vpos)
+              16:   14(fvec3) VectorShuffle 15 15 0 1 2
+              21:     20(ptr) AccessChain 10(vpos) 19
+              22:    6(float) Load 21
+              23:    6(float) FDiv 17 22
+              24:    6(float) CompositeExtract 16 0
+              25:    6(float) CompositeExtract 16 1
+              26:    6(float) CompositeExtract 16 2
+              27:    7(fvec4) CompositeConstruct 24 25 26 23
+                              Store 13(vpos_t) 27
+              29:     20(ptr) AccessChain 13(vpos_t) 28
+              30:    6(float) Load 29
+              33:    32(bool) FOrdLessThan 30 31
+                              SelectionMerge 35 None
+                              BranchConditional 33 34 45
+              34:               Label
+              42:     41(ptr)   AccessChain 38 40
+              43:    7(fvec4)   Load 42
+                                ReturnValue 43
+              45:               Label
+              47:     41(ptr)   AccessChain 38 46
+              48:    7(fvec4)   Load 47
+                                ReturnValue 48
+              35:             Label
+                              Unreachable
+                              FunctionEnd
diff --git a/Test/hlsl.w-recip.frag b/Test/hlsl.w-recip.frag
new file mode 100644
index 0000000..4812d26
--- /dev/null
+++ b/Test/hlsl.w-recip.frag
@@ -0,0 +1,12 @@
+float4 AmbientColor = float4(1, 0.5, 0, 1);
+float4 AmbientColor2 = float4(0.5, 1, 0, 0);
+
+float4 main(float4 vpos : SV_POSITION) : SV_TARGET
+{
+    float4 vpos_t = float4(vpos.xyz, 1 / vpos.w);
+    if (vpos_t.x < 400)
+        return AmbientColor;
+    else
+        return AmbientColor2;
+}
+
diff --git a/Test/runtests b/Test/runtests
index a7bdda7..f27b6d8 100755
--- a/Test/runtests
+++ b/Test/runtests
@@ -255,6 +255,13 @@
 diff -b $BASEDIR/hlsl.y-negate-3.vert.out $TARGETDIR/hlsl.y-negate-3.vert.out || HASERROR=1
 
 #
+# Testing position W reciprocal
+#
+echo "Testing position W reciprocal"
+run -H -e main -V -D -Od -H -i --hlsl-dx-position-w hlsl.w-recip.frag > $TARGETDIR/hlsl.w-recip.frag.out
+diff -b $BASEDIR/hlsl.w-recip.frag.out $TARGETDIR/hlsl.w-recip.frag.out || HASERROR=1
+
+#
 # Testing hlsl_functionality1
 #
 echo "Testing hlsl_functionality1"
diff --git a/glslang/HLSL/hlslParseHelper.cpp b/glslang/HLSL/hlslParseHelper.cpp
index 0936bd3..9122973 100644
--- a/glslang/HLSL/hlslParseHelper.cpp
+++ b/glslang/HLSL/hlslParseHelper.cpp
@@ -2167,8 +2167,21 @@
         TIntermSymbol* arg = intermediate.addSymbol(*argVars.back());
         handleFunctionArgument(&callee, callingArgs, arg);
         if (param.type->getQualifier().isParamInput()) {
-            intermediate.growAggregate(synthBody, handleAssign(loc, EOpAssign, arg,
-                                                               intermediate.addSymbol(**inputIt)));
+            TIntermTyped* input = intermediate.addSymbol(**inputIt);
+            if (input->getType().getQualifier().builtIn == EbvFragCoord && intermediate.getDxPositionW()) {
+                // Replace FragCoord W with reciprocal
+                auto pos_xyz = handleDotDereference(loc, input, "xyz");
+                auto pos_w   = handleDotDereference(loc, input, "w");
+                auto one     = intermediate.addConstantUnion(1.0, EbtFloat, loc);
+                auto recip_w = intermediate.addBinaryMath(EOpDiv, one, pos_w, loc);
+                TIntermAggregate* dst = new TIntermAggregate(EOpConstructVec4);
+                dst->getSequence().push_back(pos_xyz);
+                dst->getSequence().push_back(recip_w);
+                dst->setType(TType(EbtFloat, EvqTemporary, 4));
+                dst->setLoc(loc);
+                input = dst;
+            }
+            intermediate.growAggregate(synthBody, handleAssign(loc, EOpAssign, arg, input));
             inputIt++;
         }
         if (param.type->getQualifier().storage == EvqUniform) {
diff --git a/glslang/MachineIndependent/ShaderLang.cpp b/glslang/MachineIndependent/ShaderLang.cpp
index a2dd71c..bcf2c33 100644
--- a/glslang/MachineIndependent/ShaderLang.cpp
+++ b/glslang/MachineIndependent/ShaderLang.cpp
@@ -1829,6 +1829,7 @@
 }
 
 void TShader::setInvertY(bool invert)                   { intermediate->setInvertY(invert); }
+void TShader::setDxPositionW(bool invert)               { intermediate->setDxPositionW(invert); }
 void TShader::setNanMinMaxClamp(bool useNonNan)         { intermediate->setNanMinMaxClamp(useNonNan); }
 
 #ifndef GLSLANG_WEB
diff --git a/glslang/MachineIndependent/linkValidate.cpp b/glslang/MachineIndependent/linkValidate.cpp
index b1adfc9..d2eb902 100644
--- a/glslang/MachineIndependent/linkValidate.cpp
+++ b/glslang/MachineIndependent/linkValidate.cpp
@@ -312,6 +312,7 @@
     MERGE_TRUE(autoMapBindings);
     MERGE_TRUE(autoMapLocations);
     MERGE_TRUE(invertY);
+    MERGE_TRUE(dxPositionW);
     MERGE_TRUE(flattenUniformArrays);
     MERGE_TRUE(useUnknownFormat);
     MERGE_TRUE(hlslOffsets);
diff --git a/glslang/MachineIndependent/localintermediate.h b/glslang/MachineIndependent/localintermediate.h
index 6aa9399..940abf7 100644
--- a/glslang/MachineIndependent/localintermediate.h
+++ b/glslang/MachineIndependent/localintermediate.h
@@ -290,6 +290,7 @@
         resources(TBuiltInResource{}),
         numEntryPoints(0), numErrors(0), numPushConstants(0), recursive(false),
         invertY(false),
+        dxPositionW(false),
         useStorageBuffer(false),
         invariantAll(false),
         nanMinMaxClamp(false),
@@ -460,6 +461,14 @@
     }
     bool getInvertY() const { return invertY; }
 
+    void setDxPositionW(bool dxPosW)
+    {
+      dxPositionW = dxPosW;
+      if (dxPositionW)
+        processes.addProcess("dx-position-w");
+    }
+    bool getDxPositionW() const { return dxPositionW; }
+
 #ifdef ENABLE_HLSL
     void setSource(EShSource s) { source = s; }
     EShSource getSource() const { return source; }
@@ -1070,6 +1079,7 @@
     int numPushConstants;
     bool recursive;
     bool invertY;
+    bool dxPositionW;
     bool useStorageBuffer;
     bool invariantAll;
     bool nanMinMaxClamp;            // true if desiring min/max/clamp to favor non-NaN over NaN
diff --git a/glslang/Public/ShaderLang.h b/glslang/Public/ShaderLang.h
index d2a4bf4..9d3e9be 100644
--- a/glslang/Public/ShaderLang.h
+++ b/glslang/Public/ShaderLang.h
@@ -485,6 +485,7 @@
     GLSLANG_EXPORT void addUniformLocationOverride(const char* name, int loc);
     GLSLANG_EXPORT void setUniformLocationBase(int base);
     GLSLANG_EXPORT void setInvertY(bool invert);
+    GLSLANG_EXPORT void setDxPositionW(bool dxPosW);
 #ifdef ENABLE_HLSL
     GLSLANG_EXPORT void setHlslIoMapping(bool hlslIoMap);
     GLSLANG_EXPORT void setFlattenUniformArrays(bool flatten);