blob: c56405d1ef0aad294dcce6ca86fb38b6fc5fe621 [file] [log] [blame]
Stephen Hines5a470202013-05-29 15:36:18 -07001target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
2target triple = "armv7-none-linux-gnueabi"
3
4
5%struct.rs_matrix4x4 = type { [16 x float] }
6%struct.rs_matrix3x3 = type { [9 x float] }
7%struct.rs_matrix2x2 = type { [4 x float] }
8
9define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
10 %1 = insertelement <4 x float> undef, float %in, i32 0
11 %2 = insertelement <4 x float> %1, float %in, i32 1
12 %3 = insertelement <4 x float> %2, float %in, i32 2
13 %4 = insertelement <4 x float> %3, float %in, i32 3
14 ret <4 x float> %4
15}
16
17
18define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
19 %x0 = extractelement <3 x float> %in, i32 0
20 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
21 %y0 = extractelement <3 x float> %in, i32 1
22 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
23 %z0 = extractelement <3 x float> %in, i32 2
24 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
25
26 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
27 %px2 = bitcast float* %px to <4 x float>*
28 %xm = load <4 x float>* %px2, align 4
29 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
30 %py2 = bitcast float* %py to <4 x float>*
31 %ym = load <4 x float>* %py2, align 4
32 %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 6
33 %pz2 = bitcast float* %pz to <3 x float>*
34 %zm2 = load <3 x float>* %pz2, align 4
35 %zm = shufflevector <3 x float> %zm2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
36
37 %a1 = fmul <4 x float> %x, %xm
38 %a2 = fmul <4 x float> %y, %ym
39 %a3 = fadd <4 x float> %a1, %a2
40 %a4 = fmul <4 x float> %z, %zm
41 %a5 = fadd <4 x float> %a4, %a3
42 %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
43 ret <3 x float> %a6
44}
45
46define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
47 %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind
48 ret <3 x float> %r
49}
50
51define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
52 %x0 = extractelement <2 x float> %in, i32 0
53 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
54 %y0 = extractelement <2 x float> %in, i32 1
55 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
56
57 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
58 %px2 = bitcast float* %px to <4 x float>*
59 %xm = load <4 x float>* %px2, align 4
60 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
61 %py2 = bitcast float* %py to <4 x float>*
62 %ym = load <4 x float>* %py2, align 4
63
64 %a1 = fmul <4 x float> %x, %xm
65 %a2 = fmul <4 x float> %y, %ym
66 %a3 = fadd <4 x float> %a1, %a2
67 %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
68 ret <3 x float> %a4
69}
70
71define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
72 %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind
73 ret <3 x float> %r
74}
75
76define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
77 %x0 = extractelement <4 x float> %in, i32 0
78 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
79 %y0 = extractelement <4 x float> %in, i32 1
80 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
81 %z0 = extractelement <4 x float> %in, i32 2
82 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
83 %w0 = extractelement <4 x float> %in, i32 3
84 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
85
86 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
87 %px2 = bitcast float* %px to <4 x float>*
88 %xm = load <4 x float>* %px2, align 4
89 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
90 %py2 = bitcast float* %py to <4 x float>*
91 %ym = load <4 x float>* %py2, align 4
92 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
93 %pz2 = bitcast float* %pz to <4 x float>*
94 %zm = load <4 x float>* %pz2, align 4
95 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
96 %pw2 = bitcast float* %pw to <4 x float>*
97 %wm = load <4 x float>* %pw2, align 4
98
99 %a1 = fmul <4 x float> %x, %xm
100 %a2 = fmul <4 x float> %y, %ym
101 %a3 = fadd <4 x float> %a1, %a2
102 %a4 = fmul <4 x float> %z, %zm
103 %a5 = fadd <4 x float> %a3, %a4
104 %a6 = fmul <4 x float> %w, %wm
105 %a7 = fadd <4 x float> %a5, %a6
106 ret <4 x float> %a7
107}
108
109define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
110 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind
111 ret <4 x float> %r
112}
113
114define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
115 %x0 = extractelement <3 x float> %in, i32 0
116 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
117 %y0 = extractelement <3 x float> %in, i32 1
118 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
119 %z0 = extractelement <3 x float> %in, i32 2
120 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
121
122 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
123 %px2 = bitcast float* %px to <4 x float>*
124 %xm = load <4 x float>* %px2, align 4
125 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
126 %py2 = bitcast float* %py to <4 x float>*
127 %ym = load <4 x float>* %py2, align 4
128 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
129 %pz2 = bitcast float* %pz to <4 x float>*
130 %zm = load <4 x float>* %pz2, align 4
131 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
132 %pw2 = bitcast float* %pw to <4 x float>*
133 %wm = load <4 x float>* %pw2, align 4
134
135 %a1 = fmul <4 x float> %x, %xm
136 %a2 = fadd <4 x float> %wm, %a1
137 %a3 = fmul <4 x float> %y, %ym
138 %a4 = fadd <4 x float> %a2, %a3
139 %a5 = fmul <4 x float> %z, %zm
140 %a6 = fadd <4 x float> %a4, %a5
141 ret <4 x float> %a6
142}
143
144define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
145 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind
146 ret <4 x float> %r
147}
148
149define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
150 %x0 = extractelement <2 x float> %in, i32 0
151 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
152 %y0 = extractelement <2 x float> %in, i32 1
153 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
154
155 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
156 %px2 = bitcast float* %px to <4 x float>*
157 %xm = load <4 x float>* %px2, align 4
158 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
159 %py2 = bitcast float* %py to <4 x float>*
160 %ym = load <4 x float>* %py2, align 4
161 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
162 %pw2 = bitcast float* %pw to <4 x float>*
163 %wm = load <4 x float>* %pw2, align 4
164
165 %a1 = fmul <4 x float> %x, %xm
166 %a2 = fadd <4 x float> %wm, %a1
167 %a3 = fmul <4 x float> %y, %ym
168 %a4 = fadd <4 x float> %a2, %a3
169 ret <4 x float> %a4
170}
171
172define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
173 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind
174 ret <4 x float> %r
175}
176