blob: ec63fa1a2caac0011af05b23a7fac63e1ca0d02e [file] [log] [blame]
Bill Wendling8e956f72010-10-03 03:30:30 +00001; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
2
3declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
4
5define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
6; CHECK: phaddw
7entry:
8 %0 = bitcast <1 x i64> %b to <4 x i16>
9 %1 = bitcast <1 x i64> %a to <4 x i16>
10 %2 = bitcast <4 x i16> %1 to x86_mmx
11 %3 = bitcast <4 x i16> %0 to x86_mmx
12 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
13 %5 = bitcast x86_mmx %4 to <4 x i16>
14 %6 = bitcast <4 x i16> %5 to <1 x i64>
15 %7 = extractelement <1 x i64> %6, i32 0
16 ret i64 %7
17}
18
19declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
20
21define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
22; CHECK: pcmpgtd
23entry:
24 %0 = bitcast <1 x i64> %b to <2 x i32>
25 %1 = bitcast <1 x i64> %a to <2 x i32>
26 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
27 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
28 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
29 %3 = bitcast x86_mmx %2 to <2 x i32>
30 %4 = bitcast <2 x i32> %3 to <1 x i64>
31 %5 = extractelement <1 x i64> %4, i32 0
32 ret i64 %5
33}
34
35declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
36
37define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
38; CHECK: pcmpgtw
39entry:
40 %0 = bitcast <1 x i64> %b to <4 x i16>
41 %1 = bitcast <1 x i64> %a to <4 x i16>
42 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
43 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
44 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
45 %3 = bitcast x86_mmx %2 to <4 x i16>
46 %4 = bitcast <4 x i16> %3 to <1 x i64>
47 %5 = extractelement <1 x i64> %4, i32 0
48 ret i64 %5
49}
50
51declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
52
53define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
54; CHECK: pcmpgtb
55entry:
56 %0 = bitcast <1 x i64> %b to <8 x i8>
57 %1 = bitcast <1 x i64> %a to <8 x i8>
58 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
59 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
60 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
61 %3 = bitcast x86_mmx %2 to <8 x i8>
62 %4 = bitcast <8 x i8> %3 to <1 x i64>
63 %5 = extractelement <1 x i64> %4, i32 0
64 ret i64 %5
65}
66
67declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
68
69define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
70; CHECK: pcmpeqd
71entry:
72 %0 = bitcast <1 x i64> %b to <2 x i32>
73 %1 = bitcast <1 x i64> %a to <2 x i32>
74 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
75 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
76 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
77 %3 = bitcast x86_mmx %2 to <2 x i32>
78 %4 = bitcast <2 x i32> %3 to <1 x i64>
79 %5 = extractelement <1 x i64> %4, i32 0
80 ret i64 %5
81}
82
83declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
84
85define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
86; CHECK: pcmpeqw
87entry:
88 %0 = bitcast <1 x i64> %b to <4 x i16>
89 %1 = bitcast <1 x i64> %a to <4 x i16>
90 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
91 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
92 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
93 %3 = bitcast x86_mmx %2 to <4 x i16>
94 %4 = bitcast <4 x i16> %3 to <1 x i64>
95 %5 = extractelement <1 x i64> %4, i32 0
96 ret i64 %5
97}
98
99declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
100
101define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
102; CHECK: pcmpeqb
103entry:
104 %0 = bitcast <1 x i64> %b to <8 x i8>
105 %1 = bitcast <1 x i64> %a to <8 x i8>
106 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
107 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
108 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
109 %3 = bitcast x86_mmx %2 to <8 x i8>
110 %4 = bitcast <8 x i8> %3 to <1 x i64>
111 %5 = extractelement <1 x i64> %4, i32 0
112 ret i64 %5
113}
114
115declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
116
117define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
118; CHECK: punpckldq
119entry:
120 %0 = bitcast <1 x i64> %b to <2 x i32>
121 %1 = bitcast <1 x i64> %a to <2 x i32>
122 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
123 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
124 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
125 %3 = bitcast x86_mmx %2 to <2 x i32>
126 %4 = bitcast <2 x i32> %3 to <1 x i64>
127 %5 = extractelement <1 x i64> %4, i32 0
128 ret i64 %5
129}
130
131declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
132
133define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
134; CHECK: punpcklwd
135entry:
136 %0 = bitcast <1 x i64> %b to <4 x i16>
137 %1 = bitcast <1 x i64> %a to <4 x i16>
138 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
139 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
140 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
141 %3 = bitcast x86_mmx %2 to <4 x i16>
142 %4 = bitcast <4 x i16> %3 to <1 x i64>
143 %5 = extractelement <1 x i64> %4, i32 0
144 ret i64 %5
145}
146
147declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
148
149define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
150; CHECK: punpcklbw
151entry:
152 %0 = bitcast <1 x i64> %b to <8 x i8>
153 %1 = bitcast <1 x i64> %a to <8 x i8>
154 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
155 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
156 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
157 %3 = bitcast x86_mmx %2 to <8 x i8>
158 %4 = bitcast <8 x i8> %3 to <1 x i64>
159 %5 = extractelement <1 x i64> %4, i32 0
160 ret i64 %5
161}
162
163declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
164
165define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
166; CHECK: punpckhdq
167entry:
168 %0 = bitcast <1 x i64> %b to <2 x i32>
169 %1 = bitcast <1 x i64> %a to <2 x i32>
170 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
171 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
172 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
173 %3 = bitcast x86_mmx %2 to <2 x i32>
174 %4 = bitcast <2 x i32> %3 to <1 x i64>
175 %5 = extractelement <1 x i64> %4, i32 0
176 ret i64 %5
177}
178
179declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
180
181define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
182; CHECK: punpckhwd
183entry:
184 %0 = bitcast <1 x i64> %b to <4 x i16>
185 %1 = bitcast <1 x i64> %a to <4 x i16>
186 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
187 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
188 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
189 %3 = bitcast x86_mmx %2 to <4 x i16>
190 %4 = bitcast <4 x i16> %3 to <1 x i64>
191 %5 = extractelement <1 x i64> %4, i32 0
192 ret i64 %5
193}
194
195declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
196
197define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
198; CHECK: punpckhbw
199entry:
200 %0 = bitcast <1 x i64> %b to <8 x i8>
201 %1 = bitcast <1 x i64> %a to <8 x i8>
202 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
203 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
204 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
205 %3 = bitcast x86_mmx %2 to <8 x i8>
206 %4 = bitcast <8 x i8> %3 to <1 x i64>
207 %5 = extractelement <1 x i64> %4, i32 0
208 ret i64 %5
209}
210
211declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
212
213define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
214; CHECK: packuswb
215entry:
216 %0 = bitcast <1 x i64> %b to <4 x i16>
217 %1 = bitcast <1 x i64> %a to <4 x i16>
218 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
219 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
220 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
221 %3 = bitcast x86_mmx %2 to <8 x i8>
222 %4 = bitcast <8 x i8> %3 to <1 x i64>
223 %5 = extractelement <1 x i64> %4, i32 0
224 ret i64 %5
225}
226
227declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
228
229define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
230; CHECK: packssdw
231entry:
232 %0 = bitcast <1 x i64> %b to <2 x i32>
233 %1 = bitcast <1 x i64> %a to <2 x i32>
234 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
235 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
236 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
237 %3 = bitcast x86_mmx %2 to <4 x i16>
238 %4 = bitcast <4 x i16> %3 to <1 x i64>
239 %5 = extractelement <1 x i64> %4, i32 0
240 ret i64 %5
241}
242
243declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
244
245define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
246; CHECK: packsswb
247entry:
248 %0 = bitcast <1 x i64> %b to <4 x i16>
249 %1 = bitcast <1 x i64> %a to <4 x i16>
250 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
251 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
252 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
253 %3 = bitcast x86_mmx %2 to <8 x i8>
254 %4 = bitcast <8 x i8> %3 to <1 x i64>
255 %5 = extractelement <1 x i64> %4, i32 0
256 ret i64 %5
257}
258
259declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
260
261define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
262; CHECK: psrad
263entry:
264 %0 = bitcast <1 x i64> %a to <2 x i32>
265 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
266 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
267 %2 = bitcast x86_mmx %1 to <2 x i32>
268 %3 = bitcast <2 x i32> %2 to <1 x i64>
269 %4 = extractelement <1 x i64> %3, i32 0
270 ret i64 %4
271}
272
273declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
274
275define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
276; CHECK: psraw
277entry:
278 %0 = bitcast <1 x i64> %a to <4 x i16>
279 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
280 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
281 %2 = bitcast x86_mmx %1 to <4 x i16>
282 %3 = bitcast <4 x i16> %2 to <1 x i64>
283 %4 = extractelement <1 x i64> %3, i32 0
284 ret i64 %4
285}
286
287declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
288
289define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
290; CHECK: psrlq
291entry:
292 %0 = extractelement <1 x i64> %a, i32 0
293 %mmx_var.i = bitcast i64 %0 to x86_mmx
294 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
295 %2 = bitcast x86_mmx %1 to i64
296 ret i64 %2
297}
298
299declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
300
301define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
302; CHECK: psrld
303entry:
304 %0 = bitcast <1 x i64> %a to <2 x i32>
305 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
306 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
307 %2 = bitcast x86_mmx %1 to <2 x i32>
308 %3 = bitcast <2 x i32> %2 to <1 x i64>
309 %4 = extractelement <1 x i64> %3, i32 0
310 ret i64 %4
311}
312
313declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
314
315define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
316; CHECK: psrlw
317entry:
318 %0 = bitcast <1 x i64> %a to <4 x i16>
319 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
320 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
321 %2 = bitcast x86_mmx %1 to <4 x i16>
322 %3 = bitcast <4 x i16> %2 to <1 x i64>
323 %4 = extractelement <1 x i64> %3, i32 0
324 ret i64 %4
325}
326
327declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
328
329define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
330; CHECK: psllq
331entry:
332 %0 = extractelement <1 x i64> %a, i32 0
333 %mmx_var.i = bitcast i64 %0 to x86_mmx
334 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
335 %2 = bitcast x86_mmx %1 to i64
336 ret i64 %2
337}
338
339declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
340
341define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
342; CHECK: pslld
343entry:
344 %0 = bitcast <1 x i64> %a to <2 x i32>
345 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
346 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
347 %2 = bitcast x86_mmx %1 to <2 x i32>
348 %3 = bitcast <2 x i32> %2 to <1 x i64>
349 %4 = extractelement <1 x i64> %3, i32 0
350 ret i64 %4
351}
352
353declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
354
355define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
356; CHECK: psllw
357entry:
358 %0 = bitcast <1 x i64> %a to <4 x i16>
359 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
360 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
361 %2 = bitcast x86_mmx %1 to <4 x i16>
362 %3 = bitcast <4 x i16> %2 to <1 x i64>
363 %4 = extractelement <1 x i64> %3, i32 0
364 ret i64 %4
365}
366
367declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
368
369define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
370; CHECK: psrad
371entry:
372 %0 = bitcast <1 x i64> %a to <2 x i32>
373 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
374 %1 = extractelement <1 x i64> %b, i32 0
375 %mmx_var1.i = bitcast i64 %1 to x86_mmx
376 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
377 %3 = bitcast x86_mmx %2 to <2 x i32>
378 %4 = bitcast <2 x i32> %3 to <1 x i64>
379 %5 = extractelement <1 x i64> %4, i32 0
380 ret i64 %5
381}
382
383declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
384
385define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
386; CHECK: psraw
387entry:
388 %0 = bitcast <1 x i64> %a to <4 x i16>
389 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
390 %1 = extractelement <1 x i64> %b, i32 0
391 %mmx_var1.i = bitcast i64 %1 to x86_mmx
392 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
393 %3 = bitcast x86_mmx %2 to <4 x i16>
394 %4 = bitcast <4 x i16> %3 to <1 x i64>
395 %5 = extractelement <1 x i64> %4, i32 0
396 ret i64 %5
397}
398
399declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
400
401define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
402; CHECK: psrlq
403entry:
404 %0 = extractelement <1 x i64> %a, i32 0
405 %mmx_var.i = bitcast i64 %0 to x86_mmx
406 %1 = extractelement <1 x i64> %b, i32 0
407 %mmx_var1.i = bitcast i64 %1 to x86_mmx
408 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
409 %3 = bitcast x86_mmx %2 to i64
410 ret i64 %3
411}
412
413declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
414
415define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
416; CHECK: psrld
417entry:
418 %0 = bitcast <1 x i64> %a to <2 x i32>
419 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
420 %1 = extractelement <1 x i64> %b, i32 0
421 %mmx_var1.i = bitcast i64 %1 to x86_mmx
422 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
423 %3 = bitcast x86_mmx %2 to <2 x i32>
424 %4 = bitcast <2 x i32> %3 to <1 x i64>
425 %5 = extractelement <1 x i64> %4, i32 0
426 ret i64 %5
427}
428
429declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
430
431define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
432; CHECK: psrlw
433entry:
434 %0 = bitcast <1 x i64> %a to <4 x i16>
435 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
436 %1 = extractelement <1 x i64> %b, i32 0
437 %mmx_var1.i = bitcast i64 %1 to x86_mmx
438 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
439 %3 = bitcast x86_mmx %2 to <4 x i16>
440 %4 = bitcast <4 x i16> %3 to <1 x i64>
441 %5 = extractelement <1 x i64> %4, i32 0
442 ret i64 %5
443}
444
445declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
446
447define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
448; CHECK: psllq
449entry:
450 %0 = extractelement <1 x i64> %a, i32 0
451 %mmx_var.i = bitcast i64 %0 to x86_mmx
452 %1 = extractelement <1 x i64> %b, i32 0
453 %mmx_var1.i = bitcast i64 %1 to x86_mmx
454 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
455 %3 = bitcast x86_mmx %2 to i64
456 ret i64 %3
457}
458
459declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
460
461define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
462; CHECK: pslld
463entry:
464 %0 = bitcast <1 x i64> %a to <2 x i32>
465 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
466 %1 = extractelement <1 x i64> %b, i32 0
467 %mmx_var1.i = bitcast i64 %1 to x86_mmx
468 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
469 %3 = bitcast x86_mmx %2 to <2 x i32>
470 %4 = bitcast <2 x i32> %3 to <1 x i64>
471 %5 = extractelement <1 x i64> %4, i32 0
472 ret i64 %5
473}
474
475declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
476
477define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
478; CHECK: psllw
479entry:
480 %0 = bitcast <1 x i64> %a to <4 x i16>
481 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
482 %1 = extractelement <1 x i64> %b, i32 0
483 %mmx_var1.i = bitcast i64 %1 to x86_mmx
484 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
485 %3 = bitcast x86_mmx %2 to <4 x i16>
486 %4 = bitcast <4 x i16> %3 to <1 x i64>
487 %5 = extractelement <1 x i64> %4, i32 0
488 ret i64 %5
489}
490
491declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
492
493define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
494; CHECK: pxor
495entry:
496 %0 = bitcast <1 x i64> %b to <2 x i32>
497 %1 = bitcast <1 x i64> %a to <2 x i32>
498 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
499 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
500 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
501 %3 = bitcast x86_mmx %2 to <2 x i32>
502 %4 = bitcast <2 x i32> %3 to <1 x i64>
503 %5 = extractelement <1 x i64> %4, i32 0
504 ret i64 %5
505}
506
507declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
508
509define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
510; CHECK: por
511entry:
512 %0 = bitcast <1 x i64> %b to <2 x i32>
513 %1 = bitcast <1 x i64> %a to <2 x i32>
514 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
515 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
516 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
517 %3 = bitcast x86_mmx %2 to <2 x i32>
518 %4 = bitcast <2 x i32> %3 to <1 x i64>
519 %5 = extractelement <1 x i64> %4, i32 0
520 ret i64 %5
521}
522
523declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
524
525define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
526; CHECK: pandn
527entry:
528 %0 = bitcast <1 x i64> %b to <2 x i32>
529 %1 = bitcast <1 x i64> %a to <2 x i32>
530 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
531 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
532 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
533 %3 = bitcast x86_mmx %2 to <2 x i32>
534 %4 = bitcast <2 x i32> %3 to <1 x i64>
535 %5 = extractelement <1 x i64> %4, i32 0
536 ret i64 %5
537}
538
539declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
540
541define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
542; CHECK: pand
543entry:
544 %0 = bitcast <1 x i64> %b to <2 x i32>
545 %1 = bitcast <1 x i64> %a to <2 x i32>
546 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
547 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
548 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
549 %3 = bitcast x86_mmx %2 to <2 x i32>
550 %4 = bitcast <2 x i32> %3 to <1 x i64>
551 %5 = extractelement <1 x i64> %4, i32 0
552 ret i64 %5
553}
554
555declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
556
557define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
558; CHECK: pmullw
559entry:
560 %0 = bitcast <1 x i64> %b to <4 x i16>
561 %1 = bitcast <1 x i64> %a to <4 x i16>
562 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
563 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
564 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
565 %3 = bitcast x86_mmx %2 to <4 x i16>
566 %4 = bitcast <4 x i16> %3 to <1 x i64>
567 %5 = extractelement <1 x i64> %4, i32 0
568 ret i64 %5
569}
570
571define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
572; CHECK: pmullw
573entry:
574 %0 = bitcast <1 x i64> %b to <4 x i16>
575 %1 = bitcast <1 x i64> %a to <4 x i16>
576 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
577 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
578 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
579 %3 = bitcast x86_mmx %2 to <4 x i16>
580 %4 = bitcast <4 x i16> %3 to <1 x i64>
581 %5 = extractelement <1 x i64> %4, i32 0
582 ret i64 %5
583}
584
585declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
586
587define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
588; CHECK: pmulhw
589entry:
590 %0 = bitcast <1 x i64> %b to <4 x i16>
591 %1 = bitcast <1 x i64> %a to <4 x i16>
592 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
593 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
594 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
595 %3 = bitcast x86_mmx %2 to <4 x i16>
596 %4 = bitcast <4 x i16> %3 to <1 x i64>
597 %5 = extractelement <1 x i64> %4, i32 0
598 ret i64 %5
599}
600
601declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
602
603define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
604; CHECK: pmaddwd
605entry:
606 %0 = bitcast <1 x i64> %b to <4 x i16>
607 %1 = bitcast <1 x i64> %a to <4 x i16>
608 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
609 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
610 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
611 %3 = bitcast x86_mmx %2 to <2 x i32>
612 %4 = bitcast <2 x i32> %3 to <1 x i64>
613 %5 = extractelement <1 x i64> %4, i32 0
614 ret i64 %5
615}
616
617declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
618
619define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
620; CHECK: psubusw
621entry:
622 %0 = bitcast <1 x i64> %b to <4 x i16>
623 %1 = bitcast <1 x i64> %a to <4 x i16>
624 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
625 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
626 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
627 %3 = bitcast x86_mmx %2 to <4 x i16>
628 %4 = bitcast <4 x i16> %3 to <1 x i64>
629 %5 = extractelement <1 x i64> %4, i32 0
630 ret i64 %5
631}
632
633declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
634
635define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
636; CHECK: psubusb
637entry:
638 %0 = bitcast <1 x i64> %b to <8 x i8>
639 %1 = bitcast <1 x i64> %a to <8 x i8>
640 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
641 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
642 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
643 %3 = bitcast x86_mmx %2 to <8 x i8>
644 %4 = bitcast <8 x i8> %3 to <1 x i64>
645 %5 = extractelement <1 x i64> %4, i32 0
646 ret i64 %5
647}
648
649declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
650
651define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
652; CHECK: psubsw
653entry:
654 %0 = bitcast <1 x i64> %b to <4 x i16>
655 %1 = bitcast <1 x i64> %a to <4 x i16>
656 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
657 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
658 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
659 %3 = bitcast x86_mmx %2 to <4 x i16>
660 %4 = bitcast <4 x i16> %3 to <1 x i64>
661 %5 = extractelement <1 x i64> %4, i32 0
662 ret i64 %5
663}
664
665declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
666
667define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
668; CHECK: psubsb
669entry:
670 %0 = bitcast <1 x i64> %b to <8 x i8>
671 %1 = bitcast <1 x i64> %a to <8 x i8>
672 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
673 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
674 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
675 %3 = bitcast x86_mmx %2 to <8 x i8>
676 %4 = bitcast <8 x i8> %3 to <1 x i64>
677 %5 = extractelement <1 x i64> %4, i32 0
678 ret i64 %5
679}
680
681define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
682; CHECK: psubq
683entry:
684 %0 = extractelement <1 x i64> %a, i32 0
685 %mmx_var = bitcast i64 %0 to x86_mmx
686 %1 = extractelement <1 x i64> %b, i32 0
687 %mmx_var1 = bitcast i64 %1 to x86_mmx
688 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
689 %3 = bitcast x86_mmx %2 to i64
690 ret i64 %3
691}
692
693declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
694
695declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
696
697define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
698; CHECK: psubd
699entry:
700 %0 = bitcast <1 x i64> %b to <2 x i32>
701 %1 = bitcast <1 x i64> %a to <2 x i32>
702 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
703 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
704 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
705 %3 = bitcast x86_mmx %2 to <2 x i32>
706 %4 = bitcast <2 x i32> %3 to <1 x i64>
707 %5 = extractelement <1 x i64> %4, i32 0
708 ret i64 %5
709}
710
711declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
712
713define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
714; CHECK: psubw
715entry:
716 %0 = bitcast <1 x i64> %b to <4 x i16>
717 %1 = bitcast <1 x i64> %a to <4 x i16>
718 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
719 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
720 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
721 %3 = bitcast x86_mmx %2 to <4 x i16>
722 %4 = bitcast <4 x i16> %3 to <1 x i64>
723 %5 = extractelement <1 x i64> %4, i32 0
724 ret i64 %5
725}
726
727declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
728
729define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
730; CHECK: psubb
731entry:
732 %0 = bitcast <1 x i64> %b to <8 x i8>
733 %1 = bitcast <1 x i64> %a to <8 x i8>
734 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
735 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
736 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
737 %3 = bitcast x86_mmx %2 to <8 x i8>
738 %4 = bitcast <8 x i8> %3 to <1 x i64>
739 %5 = extractelement <1 x i64> %4, i32 0
740 ret i64 %5
741}
742
743declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
744
745define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
746; CHECK: paddusw
747entry:
748 %0 = bitcast <1 x i64> %b to <4 x i16>
749 %1 = bitcast <1 x i64> %a to <4 x i16>
750 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
751 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
752 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
753 %3 = bitcast x86_mmx %2 to <4 x i16>
754 %4 = bitcast <4 x i16> %3 to <1 x i64>
755 %5 = extractelement <1 x i64> %4, i32 0
756 ret i64 %5
757}
758
759declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
760
761define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
762; CHECK: paddusb
763entry:
764 %0 = bitcast <1 x i64> %b to <8 x i8>
765 %1 = bitcast <1 x i64> %a to <8 x i8>
766 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
767 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
768 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
769 %3 = bitcast x86_mmx %2 to <8 x i8>
770 %4 = bitcast <8 x i8> %3 to <1 x i64>
771 %5 = extractelement <1 x i64> %4, i32 0
772 ret i64 %5
773}
774
775declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
776
777define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
778; CHECK: paddsw
779entry:
780 %0 = bitcast <1 x i64> %b to <4 x i16>
781 %1 = bitcast <1 x i64> %a to <4 x i16>
782 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
783 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
784 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
785 %3 = bitcast x86_mmx %2 to <4 x i16>
786 %4 = bitcast <4 x i16> %3 to <1 x i64>
787 %5 = extractelement <1 x i64> %4, i32 0
788 ret i64 %5
789}
790
791declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
792
793define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
794; CHECK: paddsb
795entry:
796 %0 = bitcast <1 x i64> %b to <8 x i8>
797 %1 = bitcast <1 x i64> %a to <8 x i8>
798 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
799 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
800 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
801 %3 = bitcast x86_mmx %2 to <8 x i8>
802 %4 = bitcast <8 x i8> %3 to <1 x i64>
803 %5 = extractelement <1 x i64> %4, i32 0
804 ret i64 %5
805}
806
807declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
808
809define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
810; CHECK: paddq
811entry:
812 %0 = extractelement <1 x i64> %a, i32 0
813 %mmx_var = bitcast i64 %0 to x86_mmx
814 %1 = extractelement <1 x i64> %b, i32 0
815 %mmx_var1 = bitcast i64 %1 to x86_mmx
816 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
817 %3 = bitcast x86_mmx %2 to i64
818 ret i64 %3
819}
820
821declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
822
823define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
824; CHECK: paddd
825entry:
826 %0 = bitcast <1 x i64> %b to <2 x i32>
827 %1 = bitcast <1 x i64> %a to <2 x i32>
828 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
829 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
830 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
831 %3 = bitcast x86_mmx %2 to <2 x i32>
832 %4 = bitcast <2 x i32> %3 to <1 x i64>
833 %5 = extractelement <1 x i64> %4, i32 0
834 ret i64 %5
835}
836
837declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
838
839define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
840; CHECK: paddw
841entry:
842 %0 = bitcast <1 x i64> %b to <4 x i16>
843 %1 = bitcast <1 x i64> %a to <4 x i16>
844 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
845 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
846 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
847 %3 = bitcast x86_mmx %2 to <4 x i16>
848 %4 = bitcast <4 x i16> %3 to <1 x i64>
849 %5 = extractelement <1 x i64> %4, i32 0
850 ret i64 %5
851}
852
853declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
854
855define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
856; CHECK: paddb
857entry:
858 %0 = bitcast <1 x i64> %b to <8 x i8>
859 %1 = bitcast <1 x i64> %a to <8 x i8>
860 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
861 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
862 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
863 %3 = bitcast x86_mmx %2 to <8 x i8>
864 %4 = bitcast <8 x i8> %3 to <1 x i64>
865 %5 = extractelement <1 x i64> %4, i32 0
866 ret i64 %5
867}
868
869declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
870
871define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
872; CHECK: psadbw
873entry:
874 %0 = bitcast <1 x i64> %b to <8 x i8>
875 %1 = bitcast <1 x i64> %a to <8 x i8>
876 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
877 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
878 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
879 %3 = bitcast x86_mmx %2 to i64
880 ret i64 %3
881}
882
883declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
884
885define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
886; CHECK: pminsw
887entry:
888 %0 = bitcast <1 x i64> %b to <4 x i16>
889 %1 = bitcast <1 x i64> %a to <4 x i16>
890 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
891 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
892 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
893 %3 = bitcast x86_mmx %2 to <4 x i16>
894 %4 = bitcast <4 x i16> %3 to <1 x i64>
895 %5 = extractelement <1 x i64> %4, i32 0
896 ret i64 %5
897}
898
899declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
900
901define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
902; CHECK: pminub
903entry:
904 %0 = bitcast <1 x i64> %b to <8 x i8>
905 %1 = bitcast <1 x i64> %a to <8 x i8>
906 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
907 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
908 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
909 %3 = bitcast x86_mmx %2 to <8 x i8>
910 %4 = bitcast <8 x i8> %3 to <1 x i64>
911 %5 = extractelement <1 x i64> %4, i32 0
912 ret i64 %5
913}
914
915declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
916
917define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
918; CHECK: pmaxsw
919entry:
920 %0 = bitcast <1 x i64> %b to <4 x i16>
921 %1 = bitcast <1 x i64> %a to <4 x i16>
922 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
923 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
924 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
925 %3 = bitcast x86_mmx %2 to <4 x i16>
926 %4 = bitcast <4 x i16> %3 to <1 x i64>
927 %5 = extractelement <1 x i64> %4, i32 0
928 ret i64 %5
929}
930
931declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
932
933define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
934; CHECK: pmaxub
935entry:
936 %0 = bitcast <1 x i64> %b to <8 x i8>
937 %1 = bitcast <1 x i64> %a to <8 x i8>
938 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
939 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
940 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
941 %3 = bitcast x86_mmx %2 to <8 x i8>
942 %4 = bitcast <8 x i8> %3 to <1 x i64>
943 %5 = extractelement <1 x i64> %4, i32 0
944 ret i64 %5
945}
946
947declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
948
949define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
950; CHECK: pavgw
951entry:
952 %0 = bitcast <1 x i64> %b to <4 x i16>
953 %1 = bitcast <1 x i64> %a to <4 x i16>
954 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
955 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
956 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
957 %3 = bitcast x86_mmx %2 to <4 x i16>
958 %4 = bitcast <4 x i16> %3 to <1 x i64>
959 %5 = extractelement <1 x i64> %4, i32 0
960 ret i64 %5
961}
962
963declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
964
965define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
966; CHECK: pavgb
967entry:
968 %0 = bitcast <1 x i64> %b to <8 x i8>
969 %1 = bitcast <1 x i64> %a to <8 x i8>
970 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
971 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
972 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
973 %3 = bitcast x86_mmx %2 to <8 x i8>
974 %4 = bitcast <8 x i8> %3 to <1 x i64>
975 %5 = extractelement <1 x i64> %4, i32 0
976 ret i64 %5
977}
978
979declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
980
981define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
982; CHECK: movntq
983entry:
984 %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
985 %0 = extractelement <1 x i64> %a, i32 0
986 %mmx_var.i = bitcast i64 %0 to x86_mmx
987 tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
988 ret void
989}
990
991declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
992
993define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
994; CHECK: pmovmskb
995entry:
996 %0 = bitcast <1 x i64> %a to <8 x i8>
997 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
998 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
999 ret i32 %1
1000}
1001
1002declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1003
1004define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1005; CHECK: maskmovq
1006entry:
1007 %0 = bitcast <1 x i64> %n to <8 x i8>
1008 %1 = bitcast <1 x i64> %d to <8 x i8>
1009 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1010 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1011 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1012 ret void
1013}
1014
1015declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1016
1017define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1018; CHECK: pmulhuw
1019entry:
1020 %0 = bitcast <1 x i64> %b to <4 x i16>
1021 %1 = bitcast <1 x i64> %a to <4 x i16>
1022 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1023 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1024 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1025 %3 = bitcast x86_mmx %2 to <4 x i16>
1026 %4 = bitcast <4 x i16> %3 to <1 x i64>
1027 %5 = extractelement <1 x i64> %4, i32 0
1028 ret i64 %5
1029}
1030
Chris Lattnerece29022010-10-03 20:02:48 +00001031declare x86_mmx @llvm.x86.ssse3.pshuf.w(x86_mmx, i8) nounwind readnone
Bill Wendling8e956f72010-10-03 03:30:30 +00001032
1033define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1034; CHECK: pshufw
1035entry:
1036 %0 = bitcast <1 x i64> %a to <4 x i16>
1037 %1 = bitcast <4 x i16> %0 to x86_mmx
Chris Lattnerece29022010-10-03 20:02:48 +00001038 %2 = tail call x86_mmx @llvm.x86.ssse3.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
Bill Wendling8e956f72010-10-03 03:30:30 +00001039 %3 = bitcast x86_mmx %2 to <4 x i16>
1040 %4 = bitcast <4 x i16> %3 to <1 x i64>
1041 %5 = extractelement <1 x i64> %4, i32 0
1042 ret i64 %5
1043}
1044
1045declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1046
1047define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1048; CHECK: pmuludq
1049entry:
1050 %0 = bitcast <1 x i64> %b to <2 x i32>
1051 %1 = bitcast <1 x i64> %a to <2 x i32>
1052 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1053 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1054 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1055 %3 = bitcast x86_mmx %2 to i64
1056 ret i64 %3
1057}
1058
1059declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1060
1061define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1062; CHECK: cvtpi2pd
1063entry:
1064 %0 = bitcast <1 x i64> %a to <2 x i32>
1065 %1 = bitcast <2 x i32> %0 to x86_mmx
1066 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1067 ret <2 x double> %2
1068}
1069
1070declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1071
1072define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1073; CHECK: cvttpd2pi
1074entry:
1075 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1076 %1 = bitcast x86_mmx %0 to <2 x i32>
1077 %2 = bitcast <2 x i32> %1 to <1 x i64>
1078 %3 = extractelement <1 x i64> %2, i32 0
1079 ret i64 %3
1080}
1081
1082declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1083
1084define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1085; CHECK: cvtpd2pi
1086entry:
1087 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1088 %1 = bitcast x86_mmx %0 to <2 x i32>
1089 %2 = bitcast <2 x i32> %1 to <1 x i64>
1090 %3 = extractelement <1 x i64> %2, i32 0
1091 ret i64 %3
1092}
1093
1094declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1095
1096define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1097; CHECK: palignr
1098entry:
1099 %0 = extractelement <1 x i64> %a, i32 0
1100 %mmx_var = bitcast i64 %0 to x86_mmx
1101 %1 = extractelement <1 x i64> %b, i32 0
1102 %mmx_var1 = bitcast i64 %1 to x86_mmx
1103 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1104 %3 = bitcast x86_mmx %2 to i64
1105 ret i64 %3
1106}
1107
1108declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1109
1110define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1111; CHECK: pabsd
1112entry:
1113 %0 = bitcast <1 x i64> %a to <2 x i32>
1114 %1 = bitcast <2 x i32> %0 to x86_mmx
1115 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1116 %3 = bitcast x86_mmx %2 to <2 x i32>
1117 %4 = bitcast <2 x i32> %3 to <1 x i64>
1118 %5 = extractelement <1 x i64> %4, i32 0
1119 ret i64 %5
1120}
1121
1122declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1123
1124define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1125; CHECK: pabsw
1126entry:
1127 %0 = bitcast <1 x i64> %a to <4 x i16>
1128 %1 = bitcast <4 x i16> %0 to x86_mmx
1129 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1130 %3 = bitcast x86_mmx %2 to <4 x i16>
1131 %4 = bitcast <4 x i16> %3 to <1 x i64>
1132 %5 = extractelement <1 x i64> %4, i32 0
1133 ret i64 %5
1134}
1135
1136declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1137
1138define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1139; CHECK: pabsb
1140entry:
1141 %0 = bitcast <1 x i64> %a to <8 x i8>
1142 %1 = bitcast <8 x i8> %0 to x86_mmx
1143 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1144 %3 = bitcast x86_mmx %2 to <8 x i8>
1145 %4 = bitcast <8 x i8> %3 to <1 x i64>
1146 %5 = extractelement <1 x i64> %4, i32 0
1147 ret i64 %5
1148}
1149
1150declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1151
1152define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1153; CHECK: psignd
1154entry:
1155 %0 = bitcast <1 x i64> %b to <2 x i32>
1156 %1 = bitcast <1 x i64> %a to <2 x i32>
1157 %2 = bitcast <2 x i32> %1 to x86_mmx
1158 %3 = bitcast <2 x i32> %0 to x86_mmx
1159 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1160 %5 = bitcast x86_mmx %4 to <2 x i32>
1161 %6 = bitcast <2 x i32> %5 to <1 x i64>
1162 %7 = extractelement <1 x i64> %6, i32 0
1163 ret i64 %7
1164}
1165
1166declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1167
1168define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1169; CHECK: psignw
1170entry:
1171 %0 = bitcast <1 x i64> %b to <4 x i16>
1172 %1 = bitcast <1 x i64> %a to <4 x i16>
1173 %2 = bitcast <4 x i16> %1 to x86_mmx
1174 %3 = bitcast <4 x i16> %0 to x86_mmx
1175 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1176 %5 = bitcast x86_mmx %4 to <4 x i16>
1177 %6 = bitcast <4 x i16> %5 to <1 x i64>
1178 %7 = extractelement <1 x i64> %6, i32 0
1179 ret i64 %7
1180}
1181
1182declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1183
1184define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1185; CHECK: psignb
1186entry:
1187 %0 = bitcast <1 x i64> %b to <8 x i8>
1188 %1 = bitcast <1 x i64> %a to <8 x i8>
1189 %2 = bitcast <8 x i8> %1 to x86_mmx
1190 %3 = bitcast <8 x i8> %0 to x86_mmx
1191 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1192 %5 = bitcast x86_mmx %4 to <8 x i8>
1193 %6 = bitcast <8 x i8> %5 to <1 x i64>
1194 %7 = extractelement <1 x i64> %6, i32 0
1195 ret i64 %7
1196}
1197
1198declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1199
1200define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1201; CHECK: pshufb
1202entry:
1203 %0 = bitcast <1 x i64> %b to <8 x i8>
1204 %1 = bitcast <1 x i64> %a to <8 x i8>
1205 %2 = bitcast <8 x i8> %1 to x86_mmx
1206 %3 = bitcast <8 x i8> %0 to x86_mmx
1207 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1208 %5 = bitcast x86_mmx %4 to <8 x i8>
1209 %6 = bitcast <8 x i8> %5 to <1 x i64>
1210 %7 = extractelement <1 x i64> %6, i32 0
1211 ret i64 %7
1212}
1213
1214declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1215
1216define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1217; CHECK: pmulhrsw
1218entry:
1219 %0 = bitcast <1 x i64> %b to <4 x i16>
1220 %1 = bitcast <1 x i64> %a to <4 x i16>
1221 %2 = bitcast <4 x i16> %1 to x86_mmx
1222 %3 = bitcast <4 x i16> %0 to x86_mmx
1223 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1224 %5 = bitcast x86_mmx %4 to <4 x i16>
1225 %6 = bitcast <4 x i16> %5 to <1 x i64>
1226 %7 = extractelement <1 x i64> %6, i32 0
1227 ret i64 %7
1228}
1229
1230declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1231
1232define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1233; CHECK: pmaddubsw
1234entry:
1235 %0 = bitcast <1 x i64> %b to <8 x i8>
1236 %1 = bitcast <1 x i64> %a to <8 x i8>
1237 %2 = bitcast <8 x i8> %1 to x86_mmx
1238 %3 = bitcast <8 x i8> %0 to x86_mmx
1239 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1240 %5 = bitcast x86_mmx %4 to <8 x i8>
1241 %6 = bitcast <8 x i8> %5 to <1 x i64>
1242 %7 = extractelement <1 x i64> %6, i32 0
1243 ret i64 %7
1244}
1245
1246declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1247
1248define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1249; CHECK: phsubsw
1250entry:
1251 %0 = bitcast <1 x i64> %b to <4 x i16>
1252 %1 = bitcast <1 x i64> %a to <4 x i16>
1253 %2 = bitcast <4 x i16> %1 to x86_mmx
1254 %3 = bitcast <4 x i16> %0 to x86_mmx
1255 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1256 %5 = bitcast x86_mmx %4 to <4 x i16>
1257 %6 = bitcast <4 x i16> %5 to <1 x i64>
1258 %7 = extractelement <1 x i64> %6, i32 0
1259 ret i64 %7
1260}
1261
1262declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1263
1264define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1265; CHECK: phsubd
1266entry:
1267 %0 = bitcast <1 x i64> %b to <2 x i32>
1268 %1 = bitcast <1 x i64> %a to <2 x i32>
1269 %2 = bitcast <2 x i32> %1 to x86_mmx
1270 %3 = bitcast <2 x i32> %0 to x86_mmx
1271 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1272 %5 = bitcast x86_mmx %4 to <2 x i32>
1273 %6 = bitcast <2 x i32> %5 to <1 x i64>
1274 %7 = extractelement <1 x i64> %6, i32 0
1275 ret i64 %7
1276}
1277
1278declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1279
1280define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1281; CHECK: phsubw
1282entry:
1283 %0 = bitcast <1 x i64> %b to <4 x i16>
1284 %1 = bitcast <1 x i64> %a to <4 x i16>
1285 %2 = bitcast <4 x i16> %1 to x86_mmx
1286 %3 = bitcast <4 x i16> %0 to x86_mmx
1287 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1288 %5 = bitcast x86_mmx %4 to <4 x i16>
1289 %6 = bitcast <4 x i16> %5 to <1 x i64>
1290 %7 = extractelement <1 x i64> %6, i32 0
1291 ret i64 %7
1292}
1293
1294declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1295
1296define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1297; CHECK: phaddsw
1298entry:
1299 %0 = bitcast <1 x i64> %b to <4 x i16>
1300 %1 = bitcast <1 x i64> %a to <4 x i16>
1301 %2 = bitcast <4 x i16> %1 to x86_mmx
1302 %3 = bitcast <4 x i16> %0 to x86_mmx
1303 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1304 %5 = bitcast x86_mmx %4 to <4 x i16>
1305 %6 = bitcast <4 x i16> %5 to <1 x i64>
1306 %7 = extractelement <1 x i64> %6, i32 0
1307 ret i64 %7
1308}
1309
1310declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1311
1312define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1313; CHECK: phaddd
1314entry:
1315 %0 = bitcast <1 x i64> %b to <2 x i32>
1316 %1 = bitcast <1 x i64> %a to <2 x i32>
1317 %2 = bitcast <2 x i32> %1 to x86_mmx
1318 %3 = bitcast <2 x i32> %0 to x86_mmx
1319 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1320 %5 = bitcast x86_mmx %4 to <2 x i32>
1321 %6 = bitcast <2 x i32> %5 to <1 x i64>
1322 %7 = extractelement <1 x i64> %6, i32 0
1323 ret i64 %7
1324}