blob: 5f1f4fd8f76d024dd310c765a477c44b635a9379 [file] [log] [blame]
Duncan Sands17470be2011-09-22 20:15:48 +00001; RUN: llc < %s -march=x86-64 -mattr=+sse3,-avx | FileCheck %s -check-prefix=SSE3
2; RUN: llc < %s -march=x86-64 -mattr=-sse3,+avx | FileCheck %s -check-prefix=AVX
3
4; SSE3: haddpd1:
5; SSE3-NOT: vhaddpd
6; SSE3: haddpd
7; AVX: haddpd1:
8; AVX: vhaddpd
9define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
10 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
11 %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
12 %r = fadd <2 x double> %a, %b
13 ret <2 x double> %r
14}
15
16; SSE3: haddpd2:
17; SSE3-NOT: vhaddpd
18; SSE3: haddpd
19; AVX: haddpd2:
20; AVX: vhaddpd
21define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
22 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
23 %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1>
24 %r = fadd <2 x double> %a, %b
25 ret <2 x double> %r
26}
27
28; SSE3: haddpd3:
29; SSE3-NOT: vhaddpd
30; SSE3: haddpd
31; AVX: haddpd3:
32; AVX: vhaddpd
33define <2 x double> @haddpd3(<2 x double> %x) {
34 %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
35 %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
36 %r = fadd <2 x double> %a, %b
37 ret <2 x double> %r
38}
39
40; SSE3: haddps1:
41; SSE3-NOT: vhaddps
42; SSE3: haddps
43; AVX: haddps1:
44; AVX: vhaddps
45define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
46 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
47 %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
48 %r = fadd <4 x float> %a, %b
49 ret <4 x float> %r
50}
51
52; SSE3: haddps2:
53; SSE3-NOT: vhaddps
54; SSE3: haddps
55; AVX: haddps2:
56; AVX: vhaddps
57define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
58 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
59 %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
60 %r = fadd <4 x float> %a, %b
61 ret <4 x float> %r
62}
63
64; SSE3: haddps3:
65; SSE3-NOT: vhaddps
66; SSE3: haddps
67; AVX: haddps3:
68; AVX: vhaddps
69define <4 x float> @haddps3(<4 x float> %x) {
70 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
71 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
72 %r = fadd <4 x float> %a, %b
73 ret <4 x float> %r
74}
75
76; SSE3: haddps4:
77; SSE3-NOT: vhaddps
78; SSE3: haddps
79; AVX: haddps4:
80; AVX: vhaddps
81define <4 x float> @haddps4(<4 x float> %x) {
82 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
83 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
84 %r = fadd <4 x float> %a, %b
85 ret <4 x float> %r
86}
87
88; SSE3: haddps5:
89; SSE3-NOT: vhaddps
90; SSE3: haddps
91; AVX: haddps5:
92; AVX: vhaddps
93define <4 x float> @haddps5(<4 x float> %x) {
94 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
95 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
96 %r = fadd <4 x float> %a, %b
97 ret <4 x float> %r
98}
99
100; SSE3: haddps6:
101; SSE3-NOT: vhaddps
102; SSE3: haddps
103; AVX: haddps6:
104; AVX: vhaddps
105define <4 x float> @haddps6(<4 x float> %x) {
106 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
107 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
108 %r = fadd <4 x float> %a, %b
109 ret <4 x float> %r
110}
111
112; SSE3: haddps7:
113; SSE3-NOT: vhaddps
114; SSE3: haddps
115; AVX: haddps7:
116; AVX: vhaddps
117define <4 x float> @haddps7(<4 x float> %x) {
118 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
119 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
120 %r = fadd <4 x float> %a, %b
121 ret <4 x float> %r
122}
123
124; SSE3: hsubpd1:
125; SSE3-NOT: vhsubpd
126; SSE3: hsubpd
127; AVX: hsubpd1:
128; AVX: vhsubpd
129define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
130 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
131 %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
132 %r = fsub <2 x double> %a, %b
133 ret <2 x double> %r
134}
135
136; SSE3: hsubpd2:
137; SSE3-NOT: vhsubpd
138; SSE3: hsubpd
139; AVX: hsubpd2:
140; AVX: vhsubpd
141define <2 x double> @hsubpd2(<2 x double> %x) {
142 %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
143 %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
144 %r = fsub <2 x double> %a, %b
145 ret <2 x double> %r
146}
147
148; SSE3: hsubps1:
149; SSE3-NOT: vhsubps
150; SSE3: hsubps
151; AVX: hsubps1:
152; AVX: vhsubps
153define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
154 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
155 %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
156 %r = fsub <4 x float> %a, %b
157 ret <4 x float> %r
158}
159
160; SSE3: hsubps2:
161; SSE3-NOT: vhsubps
162; SSE3: hsubps
163; AVX: hsubps2:
164; AVX: vhsubps
165define <4 x float> @hsubps2(<4 x float> %x) {
166 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
167 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
168 %r = fsub <4 x float> %a, %b
169 ret <4 x float> %r
170}
171
172; SSE3: hsubps3:
173; SSE3-NOT: vhsubps
174; SSE3: hsubps
175; AVX: hsubps3:
176; AVX: vhsubps
177define <4 x float> @hsubps3(<4 x float> %x) {
178 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
179 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
180 %r = fsub <4 x float> %a, %b
181 ret <4 x float> %r
182}
183
184; SSE3: hsubps4:
185; SSE3-NOT: vhsubps
186; SSE3: hsubps
187; AVX: hsubps4:
188; AVX: vhsubps
189define <4 x float> @hsubps4(<4 x float> %x) {
190 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
191 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
192 %r = fsub <4 x float> %a, %b
193 ret <4 x float> %r
194}
Craig Topper138a5c62011-12-02 07:16:01 +0000195
196; SSE3: vhaddps1:
197; SSE3-NOT: vhaddps
198; SSE3: haddps
199; SSE3: haddps
200; AVX: vhaddps1:
201; AVX: vhaddps
202define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
203 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
204 %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
205 %r = fadd <8 x float> %a, %b
206 ret <8 x float> %r
207}
208
209; SSE3: vhaddps2:
210; SSE3-NOT: vhaddps
211; SSE3: haddps
212; SSE3: haddps
213; AVX: vhaddps2:
214; AVX: vhaddps
215define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
216 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
217 %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
218 %r = fadd <8 x float> %a, %b
219 ret <8 x float> %r
220}
221
222; SSE3: vhaddps3:
223; SSE3-NOT: vhaddps
224; SSE3: haddps
225; SSE3: haddps
226; AVX: vhaddps3:
227; AVX: vhaddps
228define <8 x float> @vhaddps3(<8 x float> %x) {
229 %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
230 %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
231 %r = fadd <8 x float> %a, %b
232 ret <8 x float> %r
233}
234
235; SSE3: vhsubps1:
236; SSE3-NOT: vhsubps
237; SSE3: hsubps
238; SSE3: hsubps
239; AVX: vhsubps1:
240; AVX: vhsubps
241define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
242 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
243 %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
244 %r = fsub <8 x float> %a, %b
245 ret <8 x float> %r
246}
247
248; SSE3: vhsubps3:
249; SSE3-NOT: vhsubps
250; SSE3: hsubps
251; SSE3: hsubps
252; AVX: vhsubps3:
253; AVX: vhsubps
254define <8 x float> @vhsubps3(<8 x float> %x) {
255 %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
256 %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
257 %r = fsub <8 x float> %a, %b
258 ret <8 x float> %r
259}
260
261; SSE3: vhaddpd1:
262; SSE3-NOT: vhaddpd
263; SSE3: haddpd
264; SSE3: haddpd
265; AVX: vhaddpd1:
266; AVX: vhaddpd
267define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
268 %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
269 %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
270 %r = fadd <4 x double> %a, %b
271 ret <4 x double> %r
272}
273
274; SSE3: vhsubpd1:
275; SSE3-NOT: vhsubpd
276; SSE3: hsubpd
277; SSE3: hsubpd
278; AVX: vhsubpd1:
279; AVX: vhsubpd
280define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
281 %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
282 %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
283 %r = fsub <4 x double> %a, %b
284 ret <4 x double> %r
285}