blob: f94dd050e59dc2dedd881a7de6085e758e5b73af [file] [log] [blame]
Simon Pilgrimcf764052018-02-21 12:01:30 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx | FileCheck %s --check-prefixes=X86,X86-MMX
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSSE3
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512
10
11declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
12
13;
14; v2i32
15;
16
17define void @build_v2i32_01(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +000018; X86-LABEL: build_v2i32_01:
19; X86: # %bb.0:
20; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
21; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
22; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
23; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
24; X86-NEXT: paddd %mm1, %mm1
25; X86-NEXT: movq %mm1, (%eax)
26; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +000027;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +000028; X64-LABEL: build_v2i32_01:
29; X64: # %bb.0:
30; X64-NEXT: movd %edx, %mm0
31; X64-NEXT: movd %esi, %mm1
32; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
33; X64-NEXT: paddd %mm1, %mm1
34; X64-NEXT: movq %mm1, (%rdi)
35; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +000036 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
37 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1
38 %3 = bitcast <2 x i32> %2 to x86_mmx
39 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
40 store x86_mmx %4, x86_mmx *%p0
41 ret void
42}
43
44define void @build_v2i32_0z(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
45; X86-LABEL: build_v2i32_0z:
46; X86: # %bb.0:
47; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
48; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
49; X86-NEXT: paddd %mm0, %mm0
50; X86-NEXT: movq %mm0, (%eax)
51; X86-NEXT: retl
52;
53; X64-LABEL: build_v2i32_0z:
54; X64: # %bb.0:
55; X64-NEXT: movd %esi, %mm0
56; X64-NEXT: paddd %mm0, %mm0
57; X64-NEXT: movq %mm0, (%rdi)
58; X64-NEXT: retq
59 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
60 %2 = insertelement <2 x i32> %1, i32 0, i32 1
61 %3 = bitcast <2 x i32> %2 to x86_mmx
62 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
63 store x86_mmx %4, x86_mmx *%p0
64 ret void
65}
66
67define void @build_v2i32_u1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
68; X86-MMX-LABEL: build_v2i32_u1:
69; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +000070; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
71; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
72; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
Simon Pilgrimcf764052018-02-21 12:01:30 +000073; X86-MMX-NEXT: paddd %mm0, %mm0
74; X86-MMX-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +000075; X86-MMX-NEXT: retl
76;
77; X86-SSE-LABEL: build_v2i32_u1:
78; X86-SSE: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +000079; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
80; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
81; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
Simon Pilgrimcf764052018-02-21 12:01:30 +000082; X86-SSE-NEXT: paddd %mm0, %mm0
83; X86-SSE-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +000084; X86-SSE-NEXT: retl
85;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +000086; X64-LABEL: build_v2i32_u1:
87; X64: # %bb.0:
88; X64-NEXT: movd %edx, %mm0
89; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
90; X64-NEXT: paddd %mm0, %mm0
91; X64-NEXT: movq %mm0, (%rdi)
92; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +000093 %1 = insertelement <2 x i32> undef, i32 undef, i32 0
94 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1
95 %3 = bitcast <2 x i32> %2 to x86_mmx
96 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
97 store x86_mmx %4, x86_mmx *%p0
98 ret void
99}
100
101define void @build_v2i32_z1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000102; X86-LABEL: build_v2i32_z1:
103; X86: # %bb.0:
104; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
105; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
106; X86-NEXT: pxor %mm1, %mm1
107; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
108; X86-NEXT: paddd %mm1, %mm1
109; X86-NEXT: movq %mm1, (%eax)
110; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000111;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000112; X64-LABEL: build_v2i32_z1:
113; X64: # %bb.0:
114; X64-NEXT: movd %edx, %mm0
115; X64-NEXT: pxor %mm1, %mm1
116; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
117; X64-NEXT: paddd %mm1, %mm1
118; X64-NEXT: movq %mm1, (%rdi)
119; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000120 %1 = insertelement <2 x i32> undef, i32 0, i32 0
121 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1
122 %3 = bitcast <2 x i32> %2 to x86_mmx
123 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
124 store x86_mmx %4, x86_mmx *%p0
125 ret void
126}
127
128define void @build_v2i32_00(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
129; X86-MMX-LABEL: build_v2i32_00:
130; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000131; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
132; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
133; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
Simon Pilgrimcf764052018-02-21 12:01:30 +0000134; X86-MMX-NEXT: paddd %mm0, %mm0
135; X86-MMX-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000136; X86-MMX-NEXT: retl
137;
138; X86-SSE-LABEL: build_v2i32_00:
139; X86-SSE: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000140; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
141; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
142; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
Simon Pilgrimcf764052018-02-21 12:01:30 +0000143; X86-SSE-NEXT: paddd %mm0, %mm0
144; X86-SSE-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000145; X86-SSE-NEXT: retl
146;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000147; X64-LABEL: build_v2i32_00:
148; X64: # %bb.0:
149; X64-NEXT: movd %esi, %mm0
150; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
151; X64-NEXT: paddd %mm0, %mm0
152; X64-NEXT: movq %mm0, (%rdi)
153; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000154 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
155 %2 = insertelement <2 x i32> %1, i32 %a0, i32 1
156 %3 = bitcast <2 x i32> %2 to x86_mmx
157 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
158 store x86_mmx %4, x86_mmx *%p0
159 ret void
160}
161
162;
163; v4i16
164;
165
166define void @build_v4i16_0123(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000167; X86-LABEL: build_v4i16_0123:
168; X86: # %bb.0:
169; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
170; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
171; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
172; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
173; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
174; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
175; X86-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
176; X86-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
177; X86-NEXT: paddd %mm2, %mm2
178; X86-NEXT: movq %mm2, (%eax)
179; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000180;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000181; X64-LABEL: build_v4i16_0123:
182; X64: # %bb.0:
183; X64-NEXT: movd %r8d, %mm0
184; X64-NEXT: movd %ecx, %mm1
185; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
186; X64-NEXT: movd %edx, %mm0
187; X64-NEXT: movd %esi, %mm2
188; X64-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
189; X64-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
190; X64-NEXT: paddd %mm2, %mm2
191; X64-NEXT: movq %mm2, (%rdi)
192; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000193 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
194 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1
195 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2
196 %4 = insertelement <4 x i16> %3, i16 %a3, i32 3
197 %5 = bitcast <4 x i16> %4 to x86_mmx
198 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
199 store x86_mmx %6, x86_mmx *%p0
200 ret void
201}
202
203define void @build_v4i16_01zz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000204; X86-LABEL: build_v4i16_01zz:
205; X86: # %bb.0:
206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
207; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
208; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
209; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
210; X86-NEXT: pxor %mm0, %mm0
211; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
212; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
213; X86-NEXT: paddd %mm1, %mm1
214; X86-NEXT: movq %mm1, (%eax)
215; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000216;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000217; X64-LABEL: build_v4i16_01zz:
218; X64: # %bb.0:
219; X64-NEXT: movd %edx, %mm0
220; X64-NEXT: movd %esi, %mm1
221; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
222; X64-NEXT: pxor %mm0, %mm0
223; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
224; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
225; X64-NEXT: paddd %mm1, %mm1
226; X64-NEXT: movq %mm1, (%rdi)
227; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000228 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
229 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1
230 %3 = insertelement <4 x i16> %2, i16 0, i32 2
231 %4 = insertelement <4 x i16> %3, i16 0, i32 3
232 %5 = bitcast <4 x i16> %4 to x86_mmx
233 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
234 store x86_mmx %6, x86_mmx *%p0
235 ret void
236}
237
Simon Pilgrim664582b2018-02-21 22:10:48 +0000238define void @build_v4i16_0uuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000239; X86-LABEL: build_v4i16_0uuz:
240; X86: # %bb.0:
241; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
242; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
243; X86-NEXT: paddd %mm0, %mm0
244; X86-NEXT: movq %mm0, (%eax)
245; X86-NEXT: retl
Simon Pilgrim664582b2018-02-21 22:10:48 +0000246;
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000247; X64-LABEL: build_v4i16_0uuz:
248; X64: # %bb.0:
249; X64-NEXT: movd %esi, %mm0
250; X64-NEXT: paddd %mm0, %mm0
251; X64-NEXT: movq %mm0, (%rdi)
252; X64-NEXT: retq
Simon Pilgrim664582b2018-02-21 22:10:48 +0000253 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
254 %2 = insertelement <4 x i16> %1, i16 undef, i32 1
255 %3 = insertelement <4 x i16> %2, i16 undef, i32 2
256 %4 = insertelement <4 x i16> %3, i16 0, i32 3
257 %5 = bitcast <4 x i16> %4 to x86_mmx
258 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
259 store x86_mmx %6, x86_mmx *%p0
260 ret void
261}
262
Simon Pilgrimcf764052018-02-21 12:01:30 +0000263define void @build_v4i16_0zuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000264; X86-LABEL: build_v4i16_0zuz:
265; X86: # %bb.0:
266; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
267; X86-NEXT: movd %eax, %mm0
268; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
269; X86-NEXT: paddd %mm0, %mm0
270; X86-NEXT: movq %mm0, (%eax)
271; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000272;
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000273; X64-LABEL: build_v4i16_0zuz:
274; X64: # %bb.0:
275; X64-NEXT: movzwl %si, %eax
276; X64-NEXT: movd %eax, %mm0
277; X64-NEXT: paddd %mm0, %mm0
278; X64-NEXT: movq %mm0, (%rdi)
279; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000280 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
281 %2 = insertelement <4 x i16> %1, i16 0, i32 1
282 %3 = insertelement <4 x i16> %2, i16 undef, i32 2
283 %4 = insertelement <4 x i16> %3, i16 0, i32 3
284 %5 = bitcast <4 x i16> %4 to x86_mmx
285 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
286 store x86_mmx %6, x86_mmx *%p0
287 ret void
288}
289
290define void @build_v4i16_012u(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000291; X86-LABEL: build_v4i16_012u:
292; X86: # %bb.0:
293; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
294; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
295; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
296; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
297; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
298; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
299; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
300; X86-NEXT: paddd %mm2, %mm2
301; X86-NEXT: movq %mm2, (%eax)
302; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000303;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000304; X64-LABEL: build_v4i16_012u:
305; X64: # %bb.0:
306; X64-NEXT: movd %ecx, %mm0
307; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
308; X64-NEXT: movd %edx, %mm1
309; X64-NEXT: movd %esi, %mm2
310; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
311; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
312; X64-NEXT: paddd %mm2, %mm2
313; X64-NEXT: movq %mm2, (%rdi)
314; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000315 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
316 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1
317 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2
318 %4 = insertelement <4 x i16> %3, i16 undef, i32 3
319 %5 = bitcast <4 x i16> %4 to x86_mmx
320 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
321 store x86_mmx %6, x86_mmx *%p0
322 ret void
323}
324
325define void @build_v4i16_0u00(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
326; X86-MMX-LABEL: build_v4i16_0u00:
327; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000328; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
329; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
330; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
331; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
Simon Pilgrimcf764052018-02-21 12:01:30 +0000332; X86-MMX-NEXT: paddd %mm0, %mm0
333; X86-MMX-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000334; X86-MMX-NEXT: retl
335;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000336; X86-SSE-LABEL: build_v4i16_0u00:
337; X86-SSE: # %bb.0:
338; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
339; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
340; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
341; X86-SSE-NEXT: paddd %mm0, %mm0
342; X86-SSE-NEXT: movq %mm0, (%eax)
343; X86-SSE-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000344;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000345; X64-LABEL: build_v4i16_0u00:
346; X64: # %bb.0:
347; X64-NEXT: movd %esi, %mm0
348; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
349; X64-NEXT: paddd %mm0, %mm0
350; X64-NEXT: movq %mm0, (%rdi)
351; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000352 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
353 %2 = insertelement <4 x i16> %1, i16 undef, i32 1
354 %3 = insertelement <4 x i16> %2, i16 %a0, i32 2
355 %4 = insertelement <4 x i16> %3, i16 %a0, i32 3
356 %5 = bitcast <4 x i16> %4 to x86_mmx
357 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
358 store x86_mmx %6, x86_mmx *%p0
359 ret void
360}
361
362;
363; v8i8
364;
365
366define void @build_v8i8_01234567(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000367; X86-LABEL: build_v8i8_01234567:
368; X86: # %bb.0:
369; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
370; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
371; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
372; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
373; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
374; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
375; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
376; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
377; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
378; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
379; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
380; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
381; X86-NEXT: movd {{[0-9]+}}(%esp), %mm3
382; X86-NEXT: punpcklbw %mm0, %mm3 # mm3 = mm3[0],mm0[0],mm3[1],mm0[1],mm3[2],mm0[2],mm3[3],mm0[3]
383; X86-NEXT: punpcklwd %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1]
384; X86-NEXT: punpckldq %mm2, %mm3 # mm3 = mm3[0],mm2[0]
385; X86-NEXT: paddd %mm3, %mm3
386; X86-NEXT: movq %mm3, (%eax)
387; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000388;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000389; X64-LABEL: build_v8i8_01234567:
390; X64: # %bb.0:
391; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0
392; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm1
393; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
394; X64-NEXT: movd %r9d, %mm0
395; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2
396; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
397; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
398; X64-NEXT: movd %r8d, %mm1
399; X64-NEXT: movd %ecx, %mm2
400; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
401; X64-NEXT: movd %edx, %mm1
402; X64-NEXT: movd %esi, %mm3
403; X64-NEXT: punpcklbw %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1],mm3[2],mm1[2],mm3[3],mm1[3]
404; X64-NEXT: punpcklwd %mm2, %mm3 # mm3 = mm3[0],mm2[0],mm3[1],mm2[1]
405; X64-NEXT: punpckldq %mm0, %mm3 # mm3 = mm3[0],mm0[0]
406; X64-NEXT: paddd %mm3, %mm3
407; X64-NEXT: movq %mm3, (%rdi)
408; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000409 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
410 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1
411 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2
412 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3
413 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4
414 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5
415 %7 = insertelement <8 x i8> %6, i8 %a6, i32 6
416 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7
417 %9 = bitcast <8 x i8> %8 to x86_mmx
418 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
419 store x86_mmx %10, x86_mmx *%p0
420 ret void
421}
422
423define void @build_v8i8_0u2345z7(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000424; X86-LABEL: build_v8i8_0u2345z7:
425; X86: # %bb.0:
426; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
427; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
428; X86-NEXT: pxor %mm1, %mm1
429; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
430; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
431; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
432; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
433; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
434; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
435; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
436; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
437; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
438; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
439; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
440; X86-NEXT: punpckldq %mm2, %mm0 # mm0 = mm0[0],mm2[0]
441; X86-NEXT: paddd %mm0, %mm0
442; X86-NEXT: movq %mm0, (%eax)
443; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000444;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000445; X64-LABEL: build_v8i8_0u2345z7:
446; X64: # %bb.0:
447; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0
448; X64-NEXT: pxor %mm1, %mm1
449; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
450; X64-NEXT: movd %r9d, %mm0
451; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2
452; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
453; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
454; X64-NEXT: movd %r8d, %mm1
455; X64-NEXT: movd %ecx, %mm2
456; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
457; X64-NEXT: movd %esi, %mm1
458; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
459; X64-NEXT: punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1]
460; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
461; X64-NEXT: paddd %mm1, %mm1
462; X64-NEXT: movq %mm1, (%rdi)
463; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000464 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
465 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
466 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2
467 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3
468 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4
469 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5
470 %7 = insertelement <8 x i8> %6, i8 0, i32 6
471 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7
472 %9 = bitcast <8 x i8> %8 to x86_mmx
473 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
474 store x86_mmx %10, x86_mmx *%p0
475 ret void
476}
477
478define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000479; X86-LABEL: build_v8i8_0123zzzu:
480; X86: # %bb.0:
481; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
482; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
483; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1
484; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
485; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
486; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2
487; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
488; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
489; X86-NEXT: pxor %mm0, %mm0
490; X86-NEXT: pxor %mm1, %mm1
491; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
492; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
493; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
494; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
495; X86-NEXT: paddd %mm2, %mm2
496; X86-NEXT: movq %mm2, (%eax)
497; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000498;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000499; X64-LABEL: build_v8i8_0123zzzu:
500; X64: # %bb.0:
501; X64-NEXT: movd %r8d, %mm0
502; X64-NEXT: movd %ecx, %mm1
503; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
504; X64-NEXT: movd %edx, %mm0
505; X64-NEXT: movd %esi, %mm2
506; X64-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
507; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
508; X64-NEXT: pxor %mm0, %mm0
509; X64-NEXT: pxor %mm1, %mm1
510; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
511; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
512; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
513; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
514; X64-NEXT: paddd %mm2, %mm2
515; X64-NEXT: movq %mm2, (%rdi)
516; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000517 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
518 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1
519 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2
520 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3
521 %5 = insertelement <8 x i8> %4, i8 0, i32 4
522 %6 = insertelement <8 x i8> %5, i8 0, i32 5
523 %7 = insertelement <8 x i8> %6, i8 0, i32 6
524 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
525 %9 = bitcast <8 x i8> %8 to x86_mmx
526 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
527 store x86_mmx %10, x86_mmx *%p0
528 ret void
529}
530
Simon Pilgrim664582b2018-02-21 22:10:48 +0000531define void @build_v8i8_0uuuuzzz(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000532; X86-LABEL: build_v8i8_0uuuuzzz:
533; X86: # %bb.0:
534; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
535; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
536; X86-NEXT: paddd %mm0, %mm0
537; X86-NEXT: movq %mm0, (%eax)
538; X86-NEXT: retl
Simon Pilgrim664582b2018-02-21 22:10:48 +0000539;
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000540; X64-LABEL: build_v8i8_0uuuuzzz:
541; X64: # %bb.0:
542; X64-NEXT: movd %esi, %mm0
543; X64-NEXT: paddd %mm0, %mm0
544; X64-NEXT: movq %mm0, (%rdi)
545; X64-NEXT: retq
Simon Pilgrim664582b2018-02-21 22:10:48 +0000546 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
547 %2 = insertelement <8 x i8> %1, i8 undef, i32 1
548 %3 = insertelement <8 x i8> %2, i8 undef, i32 2
549 %4 = insertelement <8 x i8> %3, i8 undef, i32 3
550 %5 = insertelement <8 x i8> %4, i8 undef, i32 4
551 %6 = insertelement <8 x i8> %5, i8 0, i32 5
552 %7 = insertelement <8 x i8> %6, i8 0, i32 6
553 %8 = insertelement <8 x i8> %7, i8 0, i32 7
554 %9 = bitcast <8 x i8> %8 to x86_mmx
555 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
556 store x86_mmx %10, x86_mmx *%p0
557 ret void
558}
559
Simon Pilgrimcf764052018-02-21 12:01:30 +0000560define void @build_v8i8_0zzzzzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000561; X86-LABEL: build_v8i8_0zzzzzzu:
562; X86: # %bb.0:
563; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
564; X86-NEXT: movd %eax, %mm0
565; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
566; X86-NEXT: paddd %mm0, %mm0
567; X86-NEXT: movq %mm0, (%eax)
568; X86-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000569;
Simon Pilgrim55b7e012018-02-21 23:07:30 +0000570; X64-LABEL: build_v8i8_0zzzzzzu:
571; X64: # %bb.0:
572; X64-NEXT: movzbl %sil, %eax
573; X64-NEXT: movd %eax, %mm0
574; X64-NEXT: paddd %mm0, %mm0
575; X64-NEXT: movq %mm0, (%rdi)
576; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000577 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
578 %2 = insertelement <8 x i8> %1, i8 0, i32 1
579 %3 = insertelement <8 x i8> %2, i8 0, i32 2
580 %4 = insertelement <8 x i8> %3, i8 0, i32 3
581 %5 = insertelement <8 x i8> %4, i8 0, i32 4
582 %6 = insertelement <8 x i8> %5, i8 0, i32 5
583 %7 = insertelement <8 x i8> %6, i8 0, i32 6
584 %8 = insertelement <8 x i8> %7, i8 undef, i32 7
585 %9 = bitcast <8 x i8> %8 to x86_mmx
586 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
587 store x86_mmx %10, x86_mmx *%p0
588 ret void
589}
590
591define void @build_v8i8_00000000(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
592; X86-MMX-LABEL: build_v8i8_00000000:
593; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000594; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
595; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
596; X86-MMX-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
597; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
598; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
Simon Pilgrimcf764052018-02-21 12:01:30 +0000599; X86-MMX-NEXT: paddd %mm0, %mm0
600; X86-MMX-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000601; X86-MMX-NEXT: retl
602;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000603; X86-SSE-LABEL: build_v8i8_00000000:
604; X86-SSE: # %bb.0:
605; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
606; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0
607; X86-SSE-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
608; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
609; X86-SSE-NEXT: paddd %mm0, %mm0
610; X86-SSE-NEXT: movq %mm0, (%eax)
611; X86-SSE-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000612;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000613; X64-LABEL: build_v8i8_00000000:
614; X64: # %bb.0:
615; X64-NEXT: movd %esi, %mm0
616; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
617; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
618; X64-NEXT: paddd %mm0, %mm0
619; X64-NEXT: movq %mm0, (%rdi)
620; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000621 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
622 %2 = insertelement <8 x i8> %1, i8 %a0, i32 1
623 %3 = insertelement <8 x i8> %2, i8 %a0, i32 2
624 %4 = insertelement <8 x i8> %3, i8 %a0, i32 3
625 %5 = insertelement <8 x i8> %4, i8 %a0, i32 4
626 %6 = insertelement <8 x i8> %5, i8 %a0, i32 5
627 %7 = insertelement <8 x i8> %6, i8 %a0, i32 6
628 %8 = insertelement <8 x i8> %7, i8 %a0, i32 7
629 %9 = bitcast <8 x i8> %8 to x86_mmx
630 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
631 store x86_mmx %10, x86_mmx *%p0
632 ret void
633}
634
635;
636; v2f32
637;
638
639define void @build_v2f32_01(x86_mmx *%p0, float %a0, float %a1) nounwind {
640; X86-MMX-LABEL: build_v2f32_01:
641; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000642; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
643; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
644; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1
645; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
646; X86-MMX-NEXT: paddd %mm1, %mm1
647; X86-MMX-NEXT: movq %mm1, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000648; X86-MMX-NEXT: retl
649;
650; X86-SSE-LABEL: build_v2f32_01:
651; X86-SSE: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000652; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
653; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Craig Topper1d6a80c2018-07-11 04:51:40 +0000654; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
655; X86-SSE-NEXT: movdq2q %xmm1, %mm0
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000656; X86-SSE-NEXT: movdq2q %xmm0, %mm1
657; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
658; X86-SSE-NEXT: paddd %mm1, %mm1
659; X86-SSE-NEXT: movq %mm1, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000660; X86-SSE-NEXT: retl
661;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000662; X64-LABEL: build_v2f32_01:
663; X64: # %bb.0:
664; X64-NEXT: movdq2q %xmm1, %mm0
665; X64-NEXT: movdq2q %xmm0, %mm1
666; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
667; X64-NEXT: paddd %mm1, %mm1
668; X64-NEXT: movq %mm1, (%rdi)
669; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000670 %1 = insertelement <2 x float> undef, float %a0, i32 0
671 %2 = insertelement <2 x float> %1, float %a1, i32 1
672 %3 = bitcast <2 x float> %2 to x86_mmx
673 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
674 store x86_mmx %4, x86_mmx *%p0
675 ret void
676}
677
678define void @build_v2f32_0z(x86_mmx *%p0, float %a0, float %a1) nounwind {
679; X86-MMX-LABEL: build_v2f32_0z:
680; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000681; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
682; X86-MMX-NEXT: pxor %mm0, %mm0
683; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1
684; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
685; X86-MMX-NEXT: paddd %mm1, %mm1
686; X86-MMX-NEXT: movq %mm1, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000687; X86-MMX-NEXT: retl
688;
689; X86-SSE-LABEL: build_v2f32_0z:
690; X86-SSE: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000691; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrimcf764052018-02-21 12:01:30 +0000692; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000693; X86-SSE-NEXT: movdq2q %xmm0, %mm0
694; X86-SSE-NEXT: pxor %mm1, %mm1
695; X86-SSE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
Simon Pilgrimcf764052018-02-21 12:01:30 +0000696; X86-SSE-NEXT: paddd %mm0, %mm0
697; X86-SSE-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000698; X86-SSE-NEXT: retl
699;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000700; X64-LABEL: build_v2f32_0z:
701; X64: # %bb.0:
702; X64-NEXT: movdq2q %xmm0, %mm0
703; X64-NEXT: pxor %mm1, %mm1
704; X64-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
705; X64-NEXT: paddd %mm0, %mm0
706; X64-NEXT: movq %mm0, (%rdi)
707; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000708 %1 = insertelement <2 x float> undef, float %a0, i32 0
709 %2 = insertelement <2 x float> %1, float 0.0, i32 1
710 %3 = bitcast <2 x float> %2 to x86_mmx
711 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
712 store x86_mmx %4, x86_mmx *%p0
713 ret void
714}
715
716define void @build_v2f32_u1(x86_mmx *%p0, float %a0, float %a1) nounwind {
717; X86-MMX-LABEL: build_v2f32_u1:
718; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000719; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
720; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
721; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
Simon Pilgrimcf764052018-02-21 12:01:30 +0000722; X86-MMX-NEXT: paddd %mm0, %mm0
723; X86-MMX-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000724; X86-MMX-NEXT: retl
725;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000726; X86-SSE-LABEL: build_v2f32_u1:
727; X86-SSE: # %bb.0:
728; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
729; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
730; X86-SSE-NEXT: movdq2q %xmm0, %mm0
731; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
732; X86-SSE-NEXT: paddd %mm0, %mm0
733; X86-SSE-NEXT: movq %mm0, (%eax)
734; X86-SSE-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000735;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000736; X64-LABEL: build_v2f32_u1:
737; X64: # %bb.0:
738; X64-NEXT: movdq2q %xmm1, %mm0
739; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
740; X64-NEXT: paddd %mm0, %mm0
741; X64-NEXT: movq %mm0, (%rdi)
742; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000743 %1 = insertelement <2 x float> undef, float undef, i32 0
744 %2 = insertelement <2 x float> %1, float %a1, i32 1
745 %3 = bitcast <2 x float> %2 to x86_mmx
746 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
747 store x86_mmx %4, x86_mmx *%p0
748 ret void
749}
750
751define void @build_v2f32_z1(x86_mmx *%p0, float %a0, float %a1) nounwind {
752; X86-MMX-LABEL: build_v2f32_z1:
753; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000754; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
755; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
756; X86-MMX-NEXT: pxor %mm1, %mm1
757; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
758; X86-MMX-NEXT: paddd %mm1, %mm1
759; X86-MMX-NEXT: movq %mm1, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000760; X86-MMX-NEXT: retl
761;
762; X86-SSE-LABEL: build_v2f32_z1:
763; X86-SSE: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000764; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrimcf764052018-02-21 12:01:30 +0000765; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000766; X86-SSE-NEXT: movdq2q %xmm0, %mm0
767; X86-SSE-NEXT: pxor %mm1, %mm1
768; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
769; X86-SSE-NEXT: paddd %mm1, %mm1
770; X86-SSE-NEXT: movq %mm1, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000771; X86-SSE-NEXT: retl
772;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000773; X64-LABEL: build_v2f32_z1:
774; X64: # %bb.0:
775; X64-NEXT: movdq2q %xmm1, %mm0
776; X64-NEXT: pxor %mm1, %mm1
777; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
778; X64-NEXT: paddd %mm1, %mm1
779; X64-NEXT: movq %mm1, (%rdi)
780; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000781 %1 = insertelement <2 x float> undef, float 0.0, i32 0
782 %2 = insertelement <2 x float> %1, float %a1, i32 1
783 %3 = bitcast <2 x float> %2 to x86_mmx
784 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
785 store x86_mmx %4, x86_mmx *%p0
786 ret void
787}
788
789define void @build_v2f32_00(x86_mmx *%p0, float %a0, float %a1) nounwind {
790; X86-MMX-LABEL: build_v2f32_00:
791; X86-MMX: # %bb.0:
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000792; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax
793; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0
794; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
Simon Pilgrimcf764052018-02-21 12:01:30 +0000795; X86-MMX-NEXT: paddd %mm0, %mm0
796; X86-MMX-NEXT: movq %mm0, (%eax)
Simon Pilgrimcf764052018-02-21 12:01:30 +0000797; X86-MMX-NEXT: retl
798;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000799; X86-SSE-LABEL: build_v2f32_00:
800; X86-SSE: # %bb.0:
801; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
802; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
803; X86-SSE-NEXT: movdq2q %xmm0, %mm0
804; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
805; X86-SSE-NEXT: paddd %mm0, %mm0
806; X86-SSE-NEXT: movq %mm0, (%eax)
807; X86-SSE-NEXT: retl
Simon Pilgrimcf764052018-02-21 12:01:30 +0000808;
Simon Pilgrimd09cc9c2018-03-11 19:22:13 +0000809; X64-LABEL: build_v2f32_00:
810; X64: # %bb.0:
811; X64-NEXT: movdq2q %xmm0, %mm0
812; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
813; X64-NEXT: paddd %mm0, %mm0
814; X64-NEXT: movq %mm0, (%rdi)
815; X64-NEXT: retq
Simon Pilgrimcf764052018-02-21 12:01:30 +0000816 %1 = insertelement <2 x float> undef, float %a0, i32 0
817 %2 = insertelement <2 x float> %1, float %a0, i32 1
818 %3 = bitcast <2 x float> %2 to x86_mmx
819 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
820 store x86_mmx %4, x86_mmx *%p0
821 ret void
822}