HLSL: Emulate write-to-output on return-from-entry-point, for return value.

This fixes issue #487 and #480.
It also correctly handles output parameters from the entry point.
diff --git a/Test/baseResults/hlsl.entry-out.frag.out b/Test/baseResults/hlsl.entry-out.frag.out
new file mode 100755
index 0000000..96f5b78
--- /dev/null
+++ b/Test/baseResults/hlsl.entry-out.frag.out
@@ -0,0 +1,127 @@
+hlsl.entry-out.frag
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:13  Function Definition: PixelShaderFunction(vf4;vf4;struct-OutParam-vf2-vi21; (global 4-component vector of float)
+0:7    Function Parameters: 
+0:7      'input' (in 4-component vector of float)
+0:7      'out1' (out 4-component vector of float)
+0:7      'out2' (out structure{temp 2-component vector of float v, temp 2-component vector of int i})
+0:?     Sequence
+0:8      move second child to first child (temp 4-component vector of float)
+0:8        'out1' (out 4-component vector of float)
+0:8        'input' (in 4-component vector of float)
+0:9      move second child to first child (temp 2-component vector of float)
+0:9        v: direct index for structure (temp 2-component vector of float)
+0:9          'out2' (out structure{temp 2-component vector of float v, temp 2-component vector of int i})
+0:9          Constant:
+0:9            0 (const int)
+0:9        Constant:
+0:9          2.000000
+0:9          2.000000
+0:10      move second child to first child (temp 2-component vector of int)
+0:10        i: direct index for structure (temp 2-component vector of int)
+0:10          'out2' (out structure{temp 2-component vector of float v, temp 2-component vector of int i})
+0:10          Constant:
+0:10            1 (const int)
+0:10        Constant:
+0:10          3 (const int)
+0:10          3 (const int)
+0:11      Sequence
+0:11        move second child to first child (temp 4-component vector of float)
+0:?           '@entryPointOutput' (out 4-component vector of float)
+0:11          'out1' (out 4-component vector of float)
+0:11        Branch: Return
+0:?   Linker Objects
+
+
+Linked fragment stage:
+
+
+Shader version: 450
+gl_FragCoord origin is upper left
+0:? Sequence
+0:13  Function Definition: PixelShaderFunction(vf4;vf4;struct-OutParam-vf2-vi21; (global 4-component vector of float)
+0:7    Function Parameters: 
+0:7      'input' (in 4-component vector of float)
+0:7      'out1' (out 4-component vector of float)
+0:7      'out2' (out structure{temp 2-component vector of float v, temp 2-component vector of int i})
+0:?     Sequence
+0:8      move second child to first child (temp 4-component vector of float)
+0:8        'out1' (out 4-component vector of float)
+0:8        'input' (in 4-component vector of float)
+0:9      move second child to first child (temp 2-component vector of float)
+0:9        v: direct index for structure (temp 2-component vector of float)
+0:9          'out2' (out structure{temp 2-component vector of float v, temp 2-component vector of int i})
+0:9          Constant:
+0:9            0 (const int)
+0:9        Constant:
+0:9          2.000000
+0:9          2.000000
+0:10      move second child to first child (temp 2-component vector of int)
+0:10        i: direct index for structure (temp 2-component vector of int)
+0:10          'out2' (out structure{temp 2-component vector of float v, temp 2-component vector of int i})
+0:10          Constant:
+0:10            1 (const int)
+0:10        Constant:
+0:10          3 (const int)
+0:10          3 (const int)
+0:11      Sequence
+0:11        move second child to first child (temp 4-component vector of float)
+0:?           '@entryPointOutput' (out 4-component vector of float)
+0:11          'out1' (out 4-component vector of float)
+0:11        Branch: Return
+0:?   Linker Objects
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 32
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "PixelShaderFunction" 9 11 18 29
+                              ExecutionMode 4 OriginUpperLeft
+                              Name 4  "PixelShaderFunction"
+                              Name 9  "out1"
+                              Name 11  "input"
+                              Name 16  "OutParam"
+                              MemberName 16(OutParam) 0  "v"
+                              MemberName 16(OutParam) 1  "i"
+                              Name 18  "out2"
+                              Name 29  "@entryPointOutput"
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+               8:             TypePointer Output 7(fvec4)
+         9(out1):      8(ptr) Variable Output
+              10:             TypePointer Input 7(fvec4)
+       11(input):     10(ptr) Variable Input
+              13:             TypeVector 6(float) 2
+              14:             TypeInt 32 1
+              15:             TypeVector 14(int) 2
+    16(OutParam):             TypeStruct 13(fvec2) 15(ivec2)
+              17:             TypePointer Output 16(OutParam)
+        18(out2):     17(ptr) Variable Output
+              19:     14(int) Constant 0
+              20:    6(float) Constant 1073741824
+              21:   13(fvec2) ConstantComposite 20 20
+              22:             TypePointer Output 13(fvec2)
+              24:     14(int) Constant 1
+              25:     14(int) Constant 3
+              26:   15(ivec2) ConstantComposite 25 25
+              27:             TypePointer Output 15(ivec2)
+29(@entryPointOutput):      8(ptr) Variable Output
+4(PixelShaderFunction):           2 Function None 3
+               5:             Label
+              12:    7(fvec4) Load 11(input)
+                              Store 9(out1) 12
+              23:     22(ptr) AccessChain 18(out2) 19
+                              Store 23 21
+              28:     27(ptr) AccessChain 18(out2) 24
+                              Store 28 26
+              30:    7(fvec4) Load 9(out1)
+                              Store 29(@entryPointOutput) 30
+                              Return
+                              FunctionEnd