blob: e559d9956cd3b52a668bd557a02dfb9689db0952 [file] [log] [blame]
Jason Sams5bc951c2012-02-27 19:32:15 -08001target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
2target triple = "armv7-none-linux-gnueabi"
3
4
5%struct.rs_matrix4x4 = type { [16 x float] }
6%struct.rs_matrix3x3 = type { [9 x float] }
7%struct.rs_matrix2x2 = type { [4 x float] }
8
Jason Sams208fbc22012-02-29 16:13:21 -08009define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
Jason Sams5bc951c2012-02-27 19:32:15 -080010 %1 = insertelement <4 x float> undef, float %in, i32 0
11 %2 = insertelement <4 x float> %1, float %in, i32 1
12 %3 = insertelement <4 x float> %2, float %in, i32 2
13 %4 = insertelement <4 x float> %3, float %in, i32 3
14 ret <4 x float> %4
15}
16
17
18define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
19 %x0 = extractelement <3 x float> %in, i32 0
Jason Sams208fbc22012-02-29 16:13:21 -080020 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080021 %y0 = extractelement <3 x float> %in, i32 1
Jason Sams208fbc22012-02-29 16:13:21 -080022 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080023 %z0 = extractelement <3 x float> %in, i32 2
Jason Sams208fbc22012-02-29 16:13:21 -080024 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080025
26 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
27 %px2 = bitcast float* %px to <4 x float>*
28 %xm = load <4 x float>* %px2
29 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
30 %py2 = bitcast float* %py to <4 x float>*
31 %ym = load <4 x float>* %py2
32 %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 6
33 %pz2 = bitcast float* %pz to <3 x float>*
34 %zm2 = load <3 x float>* %pz2
35 %zm = shufflevector <3 x float> %zm2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
36
37 %a1 = fmul <4 x float> %x, %xm
38 %a2 = fmul <4 x float> %y, %ym
39 %a3 = fadd <4 x float> %a1, %a2
40 %a4 = fmul <4 x float> %z, %zm
41 %a5 = fadd <4 x float> %a4, %a3
42 %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
43 ret <3 x float> %a6
44}
45
46define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
Jason Sams208fbc22012-02-29 16:13:21 -080047 %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind
Jason Sams5bc951c2012-02-27 19:32:15 -080048 ret <3 x float> %r
49}
50
51define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
52 %x0 = extractelement <2 x float> %in, i32 0
Jason Sams208fbc22012-02-29 16:13:21 -080053 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080054 %y0 = extractelement <2 x float> %in, i32 1
Jason Sams208fbc22012-02-29 16:13:21 -080055 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080056
57 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
58 %px2 = bitcast float* %px to <4 x float>*
59 %xm = load <4 x float>* %px2
60 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
61 %py2 = bitcast float* %py to <4 x float>*
62 %ym = load <4 x float>* %py2
63
64 %a1 = fmul <4 x float> %x, %xm
65 %a2 = fmul <4 x float> %y, %ym
66 %a3 = fadd <4 x float> %a1, %a2
67 %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
68 ret <3 x float> %a4
69}
70
71define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
Jason Sams208fbc22012-02-29 16:13:21 -080072 %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind
Jason Sams5bc951c2012-02-27 19:32:15 -080073 ret <3 x float> %r
74}
75
76define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
77 %x0 = extractelement <4 x float> %in, i32 0
Jason Sams208fbc22012-02-29 16:13:21 -080078 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080079 %y0 = extractelement <4 x float> %in, i32 1
Jason Sams208fbc22012-02-29 16:13:21 -080080 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080081 %z0 = extractelement <4 x float> %in, i32 2
Jason Sams208fbc22012-02-29 16:13:21 -080082 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080083 %w0 = extractelement <4 x float> %in, i32 3
Jason Sams208fbc22012-02-29 16:13:21 -080084 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -080085
86 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
87 %px2 = bitcast float* %px to <4 x float>*
88 %xm = load <4 x float>* %px2
89 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
90 %py2 = bitcast float* %py to <4 x float>*
91 %ym = load <4 x float>* %py2
92 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
93 %pz2 = bitcast float* %pz to <4 x float>*
94 %zm = load <4 x float>* %pz2
95 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
96 %pw2 = bitcast float* %pw to <4 x float>*
97 %wm = load <4 x float>* %pw2
98
99 %a1 = fmul <4 x float> %x, %xm
100 %a2 = fmul <4 x float> %y, %ym
101 %a3 = fadd <4 x float> %a1, %a2
102 %a4 = fmul <4 x float> %z, %zm
103 %a5 = fadd <4 x float> %a3, %a4
104 %a6 = fmul <4 x float> %w, %wm
105 %a7 = fadd <4 x float> %a5, %a6
106 ret <4 x float> %a7
107}
108
109define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
Jason Sams208fbc22012-02-29 16:13:21 -0800110 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind
Jason Sams5bc951c2012-02-27 19:32:15 -0800111 ret <4 x float> %r
112}
113
114define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
115 %x0 = extractelement <3 x float> %in, i32 0
Jason Sams208fbc22012-02-29 16:13:21 -0800116 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -0800117 %y0 = extractelement <3 x float> %in, i32 1
Jason Sams208fbc22012-02-29 16:13:21 -0800118 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -0800119 %z0 = extractelement <3 x float> %in, i32 2
Jason Sams208fbc22012-02-29 16:13:21 -0800120 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -0800121
122 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
123 %px2 = bitcast float* %px to <4 x float>*
124 %xm = load <4 x float>* %px2
125 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
126 %py2 = bitcast float* %py to <4 x float>*
127 %ym = load <4 x float>* %py2
128 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
129 %pz2 = bitcast float* %pz to <4 x float>*
130 %zm = load <4 x float>* %pz2
131 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
132 %pw2 = bitcast float* %pw to <4 x float>*
133 %wm = load <4 x float>* %pw2
134
135 %a1 = fmul <4 x float> %x, %xm
136 %a2 = fadd <4 x float> %wm, %a1
137 %a3 = fmul <4 x float> %y, %ym
138 %a4 = fadd <4 x float> %a2, %a3
139 %a5 = fmul <4 x float> %z, %zm
140 %a6 = fadd <4 x float> %a4, %a5
141 ret <4 x float> %a6
142}
143
144define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
Jason Sams208fbc22012-02-29 16:13:21 -0800145 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind
Jason Sams5bc951c2012-02-27 19:32:15 -0800146 ret <4 x float> %r
147}
148
149define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
150 %x0 = extractelement <2 x float> %in, i32 0
Jason Sams208fbc22012-02-29 16:13:21 -0800151 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -0800152 %y0 = extractelement <2 x float> %in, i32 1
Jason Sams208fbc22012-02-29 16:13:21 -0800153 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
Jason Sams5bc951c2012-02-27 19:32:15 -0800154
155 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
156 %px2 = bitcast float* %px to <4 x float>*
157 %xm = load <4 x float>* %px2
158 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
159 %py2 = bitcast float* %py to <4 x float>*
160 %ym = load <4 x float>* %py2
161 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
162 %pw2 = bitcast float* %pw to <4 x float>*
163 %wm = load <4 x float>* %pw2
164
165 %a1 = fmul <4 x float> %x, %xm
166 %a2 = fadd <4 x float> %wm, %a1
167 %a3 = fmul <4 x float> %y, %ym
168 %a4 = fadd <4 x float> %a2, %a3
169 ret <4 x float> %a4
170}
171
172define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
Jason Sams208fbc22012-02-29 16:13:21 -0800173 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind
Jason Sams5bc951c2012-02-27 19:32:15 -0800174 ret <4 x float> %r
175}
176