HLSL: for split output structs, propagate indirection to builtin.

Some stage (e.g, hull shaders) have arrayed builtin outputs (e.g, position).
When copying from the internal structure to the split form, it is necessary
to propagate that indirection to the actual arrayed outputs.  This was not
happening.

Addresses #1181
diff --git a/Test/baseResults/hlsl.hull.4.tesc.out b/Test/baseResults/hlsl.hull.4.tesc.out
index 139d7d4..3349df8 100644
--- a/Test/baseResults/hlsl.hull.4.tesc.out
+++ b/Test/baseResults/hlsl.hull.4.tesc.out
@@ -159,10 +159,9 @@
 0:?         'cpid' ( in uint InvocationID)
 0:39      Sequence
 0:39        move second child to first child ( temp 4-component vector of float)
-0:39          direct index ( out 4-component vector of float Position)
+0:39          indirect index ( out 4-component vector of float Position)
 0:?             '@entryPointOutput.m_Position' ( out 3-element array of 4-component vector of float Position)
-0:39            Constant:
-0:39              0 (const int)
+0:?             'cpid' ( in uint InvocationID)
 0:39          m_Position: direct index for structure ( temp 4-component vector of float)
 0:39            Function Call: @main(struct-HS_Input-vf4-vf41[3];u1; ( temp structure{ temp 4-component vector of float m_Position})
 0:?               'I' ( temp 3-element array of structure{ temp 4-component vector of float m_Position,  temp 4-component vector of float m_Normal})
@@ -400,10 +399,9 @@
 0:?         'cpid' ( in uint InvocationID)
 0:39      Sequence
 0:39        move second child to first child ( temp 4-component vector of float)
-0:39          direct index ( out 4-component vector of float Position)
+0:39          indirect index ( out 4-component vector of float Position)
 0:?             '@entryPointOutput.m_Position' ( out 3-element array of 4-component vector of float Position)
-0:39            Constant:
-0:39              0 (const int)
+0:?             'cpid' ( in uint InvocationID)
 0:39          m_Position: direct index for structure ( temp 4-component vector of float)
 0:39            Function Call: @main(struct-HS_Input-vf4-vf41[3];u1; ( temp structure{ temp 4-component vector of float m_Position})
 0:?               'I' ( temp 3-element array of structure{ temp 4-component vector of float m_Position,  temp 4-component vector of float m_Normal})
@@ -479,12 +477,12 @@
 
 // Module Version 10000
 // Generated by (magic number): 80002
-// Id's are bound by 127
+// Id's are bound by 128
 
                               Capability Tessellation
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint TessellationControl 4  "main" 56 64 83 86 110 123
+                              EntryPoint TessellationControl 4  "main" 56 64 83 86 111 124
                               ExecutionMode 4 OutputVertices 3
                               ExecutionMode 4 Triangles
                               ExecutionMode 4 SpacingFractionalOdd
@@ -514,20 +512,20 @@
                               Name 81  "cpid"
                               Name 83  "cpid"
                               Name 86  "@entryPointOutput.m_Position"
-                              Name 87  "param"
-                              Name 89  "param"
-                              Name 103  "@patchConstantResult"
-                              Name 104  "param"
-                              Name 110  "@patchConstantOutput.fTessFactor"
-                              Name 123  "@patchConstantOutput.fInsideTessFactor"
+                              Name 88  "param"
+                              Name 90  "param"
+                              Name 104  "@patchConstantResult"
+                              Name 105  "param"
+                              Name 111  "@patchConstantOutput.fTessFactor"
+                              Name 124  "@patchConstantOutput.fInsideTessFactor"
                               Decorate 56(I.m_Position) BuiltIn Position
                               Decorate 64(I) Location 0
                               Decorate 83(cpid) BuiltIn InvocationId
                               Decorate 86(@entryPointOutput.m_Position) BuiltIn Position
-                              Decorate 110(@patchConstantOutput.fTessFactor) Patch
-                              Decorate 110(@patchConstantOutput.fTessFactor) BuiltIn TessLevelOuter
-                              Decorate 123(@patchConstantOutput.fInsideTessFactor) Patch
-                              Decorate 123(@patchConstantOutput.fInsideTessFactor) BuiltIn TessLevelInner
+                              Decorate 111(@patchConstantOutput.fTessFactor) Patch
+                              Decorate 111(@patchConstantOutput.fTessFactor) BuiltIn TessLevelOuter
+                              Decorate 124(@patchConstantOutput.fInsideTessFactor) Patch
+                              Decorate 124(@patchConstantOutput.fInsideTessFactor) BuiltIn TessLevelInner
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeFloat 32
@@ -568,27 +566,27 @@
         83(cpid):     82(ptr) Variable Input
               85:             TypePointer Output 54
 86(@entryPointOutput.m_Position):     85(ptr) Variable Output
-              93:             TypePointer Output 7(fvec4)
-              95:      9(int) Constant 2
-              96:      9(int) Constant 1
-              97:      9(int) Constant 0
-              99:             TypeBool
-             107:      9(int) Constant 4
-             108:             TypeArray 6(float) 107
-             109:             TypePointer Output 108
-110(@patchConstantOutput.fTessFactor):    109(ptr) Variable Output
-             113:             TypePointer Output 6(float)
-             121:             TypeArray 6(float) 95
-             122:             TypePointer Output 121
-123(@patchConstantOutput.fInsideTessFactor):    122(ptr) Variable Output
+              94:             TypePointer Output 7(fvec4)
+              96:      9(int) Constant 2
+              97:      9(int) Constant 1
+              98:      9(int) Constant 0
+             100:             TypeBool
+             108:      9(int) Constant 4
+             109:             TypeArray 6(float) 108
+             110:             TypePointer Output 109
+111(@patchConstantOutput.fTessFactor):    110(ptr) Variable Output
+             114:             TypePointer Output 6(float)
+             122:             TypeArray 6(float) 96
+             123:             TypePointer Output 122
+124(@patchConstantOutput.fInsideTessFactor):    123(ptr) Variable Output
          4(main):           2 Function None 3
                5:             Label
            53(I):     12(ptr) Variable Function
         81(cpid):     19(ptr) Variable Function
-       87(param):     12(ptr) Variable Function
-       89(param):     19(ptr) Variable Function
-103(@patchConstantResult):     26(ptr) Variable Function
-      104(param):     12(ptr) Variable Function
+       88(param):     12(ptr) Variable Function
+       90(param):     19(ptr) Variable Function
+104(@patchConstantResult):     26(ptr) Variable Function
+      105(param):     12(ptr) Variable Function
               58:     57(ptr) AccessChain 56(I.m_Position) 33
               59:    7(fvec4) Load 58
               60:     48(ptr) AccessChain 53(I) 33 33
@@ -615,42 +613,43 @@
                               Store 80 79
               84:      9(int) Load 83(cpid)
                               Store 81(cpid) 84
-              88:          11 Load 53(I)
-                              Store 87(param) 88
-              90:      9(int) Load 81(cpid)
-                              Store 89(param) 90
-              91:20(HS_Main_Output) FunctionCall 24(@main(struct-HS_Input-vf4-vf41[3];u1;) 87(param) 89(param)
-              92:    7(fvec4) CompositeExtract 91 0
-              94:     93(ptr) AccessChain 86(@entryPointOutput.m_Position) 33
-                              Store 94 92
-                              ControlBarrier 95 96 97
-              98:      9(int) Load 83(cpid)
-             100:    99(bool) IEqual 98 33
-                              SelectionMerge 102 None
-                              BranchConditional 100 101 102
-             101:               Label
-             105:          11   Load 53(I)
-                                Store 104(param) 105
-             106:14(HS_Output)   FunctionCall 17(HS_ConstFunc(struct-HS_Input-vf4-vf41[3];) 104(param)
-                                Store 103(@patchConstantResult) 106
-             111:     34(ptr)   AccessChain 103(@patchConstantResult) 33 33
-             112:    6(float)   Load 111
-             114:    113(ptr)   AccessChain 110(@patchConstantOutput.fTessFactor) 33
-                                Store 114 112
-             115:     34(ptr)   AccessChain 103(@patchConstantResult) 33 32
-             116:    6(float)   Load 115
-             117:    113(ptr)   AccessChain 110(@patchConstantOutput.fTessFactor) 32
-                                Store 117 116
-             118:     34(ptr)   AccessChain 103(@patchConstantResult) 33 74
-             119:    6(float)   Load 118
-             120:    113(ptr)   AccessChain 110(@patchConstantOutput.fTessFactor) 74
-                                Store 120 119
-             124:     34(ptr)   AccessChain 103(@patchConstantResult) 32
-             125:    6(float)   Load 124
-             126:    113(ptr)   AccessChain 123(@patchConstantOutput.fInsideTessFactor) 33
-                                Store 126 125
-                                Branch 102
-             102:             Label
+              87:      9(int) Load 83(cpid)
+              89:          11 Load 53(I)
+                              Store 88(param) 89
+              91:      9(int) Load 81(cpid)
+                              Store 90(param) 91
+              92:20(HS_Main_Output) FunctionCall 24(@main(struct-HS_Input-vf4-vf41[3];u1;) 88(param) 90(param)
+              93:    7(fvec4) CompositeExtract 92 0
+              95:     94(ptr) AccessChain 86(@entryPointOutput.m_Position) 87
+                              Store 95 93
+                              ControlBarrier 96 97 98
+              99:      9(int) Load 83(cpid)
+             101:   100(bool) IEqual 99 33
+                              SelectionMerge 103 None
+                              BranchConditional 101 102 103
+             102:               Label
+             106:          11   Load 53(I)
+                                Store 105(param) 106
+             107:14(HS_Output)   FunctionCall 17(HS_ConstFunc(struct-HS_Input-vf4-vf41[3];) 105(param)
+                                Store 104(@patchConstantResult) 107
+             112:     34(ptr)   AccessChain 104(@patchConstantResult) 33 33
+             113:    6(float)   Load 112
+             115:    114(ptr)   AccessChain 111(@patchConstantOutput.fTessFactor) 33
+                                Store 115 113
+             116:     34(ptr)   AccessChain 104(@patchConstantResult) 33 32
+             117:    6(float)   Load 116
+             118:    114(ptr)   AccessChain 111(@patchConstantOutput.fTessFactor) 32
+                                Store 118 117
+             119:     34(ptr)   AccessChain 104(@patchConstantResult) 33 74
+             120:    6(float)   Load 119
+             121:    114(ptr)   AccessChain 111(@patchConstantOutput.fTessFactor) 74
+                                Store 121 120
+             125:     34(ptr)   AccessChain 104(@patchConstantResult) 32
+             126:    6(float)   Load 125
+             127:    114(ptr)   AccessChain 124(@patchConstantOutput.fInsideTessFactor) 33
+                                Store 127 126
+                                Branch 103
+             103:             Label
                               Return
                               FunctionEnd
 17(HS_ConstFunc(struct-HS_Input-vf4-vf41[3];):14(HS_Output) Function None 15
diff --git a/Test/baseResults/hlsl.hull.5.tesc.out b/Test/baseResults/hlsl.hull.5.tesc.out
index 3a42b52..656427b 100644
--- a/Test/baseResults/hlsl.hull.5.tesc.out
+++ b/Test/baseResults/hlsl.hull.5.tesc.out
@@ -80,10 +80,9 @@
 0:?         'cpid' ( in uint InvocationID)
 0:39      Sequence
 0:39        move second child to first child ( temp 4-component vector of float)
-0:39          direct index ( out 4-component vector of float Position)
+0:39          indirect index ( out 4-component vector of float Position)
 0:?             '@entryPointOutput.m_Position' ( out 3-element array of 4-component vector of float Position)
-0:39            Constant:
-0:39              0 (const int)
+0:?             'cpid' ( in uint InvocationID)
 0:39          m_Position: direct index for structure ( temp 4-component vector of float)
 0:39            Function Call: @main(u1; ( temp structure{ temp 4-component vector of float m_Position})
 0:?               'cpid' ( temp uint)
@@ -174,10 +173,9 @@
 0:?         'cpid' ( in uint InvocationID)
 0:39      Sequence
 0:39        move second child to first child ( temp 4-component vector of float)
-0:39          direct index ( out 4-component vector of float Position)
+0:39          indirect index ( out 4-component vector of float Position)
 0:?             '@entryPointOutput.m_Position' ( out 3-element array of 4-component vector of float Position)
-0:39            Constant:
-0:39              0 (const int)
+0:?             'cpid' ( in uint InvocationID)
 0:39          m_Position: direct index for structure ( temp 4-component vector of float)
 0:39            Function Call: @main(u1; ( temp structure{ temp 4-component vector of float m_Position})
 0:?               'cpid' ( temp uint)
diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp
index 5650d6c..43f984a 100755
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -2731,13 +2731,23 @@
             // copy from interstage IO built-in if needed
             subTree = intermediate.addSymbol(*builtInVar);
 
-            // Arrayness of builtIn symbols isn't handled by the normal recursion:
-            // it's been extracted and moved to the built-in.
-            if (subTree->getType().isArray() && !arrayElement.empty()) {
-                const TType splitDerefType(subTree->getType(), arrayElement.back());
-                subTree = intermediate.addIndex(EOpIndexDirect, subTree,
-                                                intermediate.addConstantUnion(arrayElement.back(), loc), loc);
-                subTree->setType(splitDerefType);
+            if (subTree->getType().isArray()) {
+                // Arrayness of builtIn symbols isn't handled by the normal recursion:
+                // it's been extracted and moved to the built-in.
+                if (!arrayElement.empty()) {
+                    const TType splitDerefType(subTree->getType(), arrayElement.back());
+                    subTree = intermediate.addIndex(EOpIndexDirect, subTree,
+                                                    intermediate.addConstantUnion(arrayElement.back(), loc), loc);
+                    subTree->setType(splitDerefType);
+                } else if (splitNode->getAsOperator() != nullptr && (splitNode->getAsOperator()->getOp() == EOpIndexIndirect)) {
+                    // This might also be a stage with arrayed outputs, in which case there's an index
+                    // operation we should transfer to the output builtin.
+
+                    const TType splitDerefType(subTree->getType(), 0);
+                    subTree = intermediate.addIndex(splitNode->getAsOperator()->getOp(), subTree,
+                                                    splitNode->getAsBinaryNode()->getRight(), loc);
+                    subTree->setType(splitDerefType);
+                }
             }
         } else if (flattened && !shouldFlatten(derefType, isLeft ? leftStorage : rightStorage, false)) {
             if (isLeft)