blob: 3288416da542701c42400bd917ab61f479c84858 [file] [log] [blame]
Sanjay Patel851e02e52018-08-23 18:38:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
5
6define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind {
7; SSE-LABEL: arg_i8_v16i8:
8; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +00009; SSE-NEXT: movd %edi, %xmm0
10; SSE-NEXT: pxor %xmm1, %xmm1
11; SSE-NEXT: pshufb %xmm1, %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +000012; SSE-NEXT: retq
13;
Sanjay Patel113cac32018-08-26 18:20:41 +000014; AVX1-LABEL: arg_i8_v16i8:
15; AVX1: # %bb.0:
16; AVX1-NEXT: vmovd %edi, %xmm0
17; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
19; AVX1-NEXT: retq
20;
21; AVX2-LABEL: arg_i8_v16i8:
22; AVX2: # %bb.0:
23; AVX2-NEXT: vmovd %edi, %xmm0
24; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
25; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000026 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
27 ret <16 x i8> %ins
28}
29
30define <8 x i16> @arg_i16_v8i16(i16 %x, i32 %y) nounwind {
31; SSE-LABEL: arg_i16_v8i16:
32; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +000033; SSE-NEXT: movd %edi, %xmm0
34; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
35; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +000036; SSE-NEXT: retq
37;
Sanjay Patel113cac32018-08-26 18:20:41 +000038; AVX1-LABEL: arg_i16_v8i16:
39; AVX1: # %bb.0:
40; AVX1-NEXT: vmovd %edi, %xmm0
41; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
42; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
43; AVX1-NEXT: retq
44;
45; AVX2-LABEL: arg_i16_v8i16:
46; AVX2: # %bb.0:
47; AVX2-NEXT: vmovd %edi, %xmm0
48; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
49; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000050 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
51 ret <8 x i16> %ins
52}
53
54define <4 x i32> @arg_i32_v4i32(i32 %x, i32 %y) nounwind {
55; SSE-LABEL: arg_i32_v4i32:
56; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +000057; SSE-NEXT: movd %edi, %xmm0
58; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +000059; SSE-NEXT: retq
60;
Sanjay Patel113cac32018-08-26 18:20:41 +000061; AVX1-LABEL: arg_i32_v4i32:
62; AVX1: # %bb.0:
63; AVX1-NEXT: vmovd %edi, %xmm0
64; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
65; AVX1-NEXT: retq
66;
67; AVX2-LABEL: arg_i32_v4i32:
68; AVX2: # %bb.0:
69; AVX2-NEXT: vmovd %edi, %xmm0
70; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
71; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000072 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
73 ret <4 x i32> %ins
74}
75
76define <2 x i64> @arg_i64_v2i64(i64 %x, i32 %y) nounwind {
77; SSE-LABEL: arg_i64_v2i64:
78; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +000079; SSE-NEXT: movq %rdi, %xmm0
80; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Sanjay Patel851e02e52018-08-23 18:38:40 +000081; SSE-NEXT: retq
82;
Sanjay Patel113cac32018-08-26 18:20:41 +000083; AVX1-LABEL: arg_i64_v2i64:
84; AVX1: # %bb.0:
85; AVX1-NEXT: vmovq %rdi, %xmm0
86; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
87; AVX1-NEXT: retq
88;
89; AVX2-LABEL: arg_i64_v2i64:
90; AVX2: # %bb.0:
91; AVX2-NEXT: vmovq %rdi, %xmm0
92; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
93; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000094 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
95 ret <2 x i64> %ins
96}
97
98define <4 x float> @arg_f32_v4f32(float %x, i32 %y) nounwind {
99; SSE-LABEL: arg_f32_v4f32:
100; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000101; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000102; SSE-NEXT: retq
103;
Sanjay Patel113cac32018-08-26 18:20:41 +0000104; AVX1-LABEL: arg_f32_v4f32:
105; AVX1: # %bb.0:
106; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
107; AVX1-NEXT: retq
108;
109; AVX2-LABEL: arg_f32_v4f32:
110; AVX2: # %bb.0:
111; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
112; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000113 %ins = insertelement <4 x float> undef, float %x, i32 %y
114 ret <4 x float> %ins
115}
116
117define <2 x double> @arg_f64_v2f64(double %x, i32 %y) nounwind {
118; SSE-LABEL: arg_f64_v2f64:
119; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000120; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000121; SSE-NEXT: retq
122;
123; AVX-LABEL: arg_f64_v2f64:
124; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000125; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000126; AVX-NEXT: retq
127 %ins = insertelement <2 x double> undef, double %x, i32 %y
128 ret <2 x double> %ins
129}
130
131define <16 x i8> @load_i8_v16i8(i8* %p, i32 %y) nounwind {
132; SSE-LABEL: load_i8_v16i8:
133; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000134; SSE-NEXT: movzbl (%rdi), %eax
135; SSE-NEXT: movd %eax, %xmm0
136; SSE-NEXT: pxor %xmm1, %xmm1
137; SSE-NEXT: pshufb %xmm1, %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000138; SSE-NEXT: retq
139;
Sanjay Patel113cac32018-08-26 18:20:41 +0000140; AVX1-LABEL: load_i8_v16i8:
141; AVX1: # %bb.0:
142; AVX1-NEXT: movzbl (%rdi), %eax
143; AVX1-NEXT: vmovd %eax, %xmm0
144; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
145; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
146; AVX1-NEXT: retq
147;
148; AVX2-LABEL: load_i8_v16i8:
149; AVX2: # %bb.0:
150; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
151; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000152 %x = load i8, i8* %p
153 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
154 ret <16 x i8> %ins
155}
156
157define <8 x i16> @load_i16_v8i16(i16* %p, i32 %y) nounwind {
158; SSE-LABEL: load_i16_v8i16:
159; SSE: # %bb.0:
Sanjay Patel851e02e52018-08-23 18:38:40 +0000160; SSE-NEXT: movzwl (%rdi), %eax
Sanjay Patel113cac32018-08-26 18:20:41 +0000161; SSE-NEXT: movd %eax, %xmm0
162; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
163; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000164; SSE-NEXT: retq
165;
Sanjay Patel113cac32018-08-26 18:20:41 +0000166; AVX1-LABEL: load_i16_v8i16:
167; AVX1: # %bb.0:
168; AVX1-NEXT: movzwl (%rdi), %eax
169; AVX1-NEXT: vmovd %eax, %xmm0
170; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
171; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
172; AVX1-NEXT: retq
173;
174; AVX2-LABEL: load_i16_v8i16:
175; AVX2: # %bb.0:
176; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
177; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000178 %x = load i16, i16* %p
179 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
180 ret <8 x i16> %ins
181}
182
183define <4 x i32> @load_i32_v4i32(i32* %p, i32 %y) nounwind {
184; SSE-LABEL: load_i32_v4i32:
185; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000186; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
187; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000188; SSE-NEXT: retq
189;
190; AVX-LABEL: load_i32_v4i32:
191; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000192; AVX-NEXT: vbroadcastss (%rdi), %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000193; AVX-NEXT: retq
194 %x = load i32, i32* %p
195 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
196 ret <4 x i32> %ins
197}
198
199define <2 x i64> @load_i64_v2i64(i64* %p, i32 %y) nounwind {
200; SSE-LABEL: load_i64_v2i64:
201; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000202; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
203; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000204; SSE-NEXT: retq
205;
Sanjay Patel113cac32018-08-26 18:20:41 +0000206; AVX1-LABEL: load_i64_v2i64:
207; AVX1: # %bb.0:
208; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
209; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
210; AVX1-NEXT: retq
211;
212; AVX2-LABEL: load_i64_v2i64:
213; AVX2: # %bb.0:
214; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
215; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000216 %x = load i64, i64* %p
217 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
218 ret <2 x i64> %ins
219}
220
221define <4 x float> @load_f32_v4f32(float* %p, i32 %y) nounwind {
222; SSE-LABEL: load_f32_v4f32:
223; SSE: # %bb.0:
Sanjay Patel851e02e52018-08-23 18:38:40 +0000224; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Sanjay Patel113cac32018-08-26 18:20:41 +0000225; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000226; SSE-NEXT: retq
227;
228; AVX-LABEL: load_f32_v4f32:
229; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000230; AVX-NEXT: vbroadcastss (%rdi), %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000231; AVX-NEXT: retq
232 %x = load float, float* %p
233 %ins = insertelement <4 x float> undef, float %x, i32 %y
234 ret <4 x float> %ins
235}
236
237define <2 x double> @load_f64_v2f64(double* %p, i32 %y) nounwind {
238; SSE-LABEL: load_f64_v2f64:
239; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000240; SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000241; SSE-NEXT: retq
242;
243; AVX-LABEL: load_f64_v2f64:
244; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000245; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000246; AVX-NEXT: retq
247 %x = load double, double* %p
248 %ins = insertelement <2 x double> undef, double %x, i32 %y
249 ret <2 x double> %ins
250}
251
252define <32 x i8> @arg_i8_v32i8(i8 %x, i32 %y) nounwind {
253; SSE-LABEL: arg_i8_v32i8:
254; SSE: # %bb.0:
255; SSE-NEXT: pushq %rbp
256; SSE-NEXT: movq %rsp, %rbp
257; SSE-NEXT: andq $-32, %rsp
258; SSE-NEXT: subq $64, %rsp
259; SSE-NEXT: # kill: def $esi killed $esi def $rsi
260; SSE-NEXT: andl $31, %esi
261; SSE-NEXT: movb %dil, (%rsp,%rsi)
262; SSE-NEXT: movaps (%rsp), %xmm0
263; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
264; SSE-NEXT: movq %rbp, %rsp
265; SSE-NEXT: popq %rbp
266; SSE-NEXT: retq
267;
Sanjay Patel113cac32018-08-26 18:20:41 +0000268; AVX1-LABEL: arg_i8_v32i8:
269; AVX1: # %bb.0:
270; AVX1-NEXT: vmovd %edi, %xmm0
271; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
272; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
273; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
274; AVX1-NEXT: retq
275;
276; AVX2-LABEL: arg_i8_v32i8:
277; AVX2: # %bb.0:
278; AVX2-NEXT: vmovd %edi, %xmm0
279; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
280; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000281 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
282 ret <32 x i8> %ins
283}
284
285define <16 x i16> @arg_i16_v16i16(i16 %x, i32 %y) nounwind {
286; SSE-LABEL: arg_i16_v16i16:
287; SSE: # %bb.0:
288; SSE-NEXT: pushq %rbp
289; SSE-NEXT: movq %rsp, %rbp
290; SSE-NEXT: andq $-32, %rsp
291; SSE-NEXT: subq $64, %rsp
292; SSE-NEXT: # kill: def $esi killed $esi def $rsi
293; SSE-NEXT: andl $15, %esi
294; SSE-NEXT: movw %di, (%rsp,%rsi,2)
295; SSE-NEXT: movaps (%rsp), %xmm0
296; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
297; SSE-NEXT: movq %rbp, %rsp
298; SSE-NEXT: popq %rbp
299; SSE-NEXT: retq
300;
Sanjay Patel113cac32018-08-26 18:20:41 +0000301; AVX1-LABEL: arg_i16_v16i16:
302; AVX1: # %bb.0:
303; AVX1-NEXT: vmovd %edi, %xmm0
304; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
305; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
306; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
307; AVX1-NEXT: retq
308;
309; AVX2-LABEL: arg_i16_v16i16:
310; AVX2: # %bb.0:
311; AVX2-NEXT: vmovd %edi, %xmm0
312; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
313; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000314 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
315 ret <16 x i16> %ins
316}
317
318define <8 x i32> @arg_i32_v8i32(i32 %x, i32 %y) nounwind {
319; SSE-LABEL: arg_i32_v8i32:
320; SSE: # %bb.0:
321; SSE-NEXT: pushq %rbp
322; SSE-NEXT: movq %rsp, %rbp
323; SSE-NEXT: andq $-32, %rsp
324; SSE-NEXT: subq $64, %rsp
325; SSE-NEXT: # kill: def $esi killed $esi def $rsi
326; SSE-NEXT: andl $7, %esi
327; SSE-NEXT: movl %edi, (%rsp,%rsi,4)
328; SSE-NEXT: movaps (%rsp), %xmm0
329; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
330; SSE-NEXT: movq %rbp, %rsp
331; SSE-NEXT: popq %rbp
332; SSE-NEXT: retq
333;
Sanjay Patel113cac32018-08-26 18:20:41 +0000334; AVX1-LABEL: arg_i32_v8i32:
335; AVX1: # %bb.0:
336; AVX1-NEXT: vmovd %edi, %xmm0
337; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
338; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
339; AVX1-NEXT: retq
340;
341; AVX2-LABEL: arg_i32_v8i32:
342; AVX2: # %bb.0:
343; AVX2-NEXT: vmovd %edi, %xmm0
344; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
345; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000346 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
347 ret <8 x i32> %ins
348}
349
350define <4 x i64> @arg_i64_v4i64(i64 %x, i32 %y) nounwind {
351; SSE-LABEL: arg_i64_v4i64:
352; SSE: # %bb.0:
353; SSE-NEXT: pushq %rbp
354; SSE-NEXT: movq %rsp, %rbp
355; SSE-NEXT: andq $-32, %rsp
356; SSE-NEXT: subq $64, %rsp
357; SSE-NEXT: # kill: def $esi killed $esi def $rsi
358; SSE-NEXT: andl $3, %esi
359; SSE-NEXT: movq %rdi, (%rsp,%rsi,8)
360; SSE-NEXT: movaps (%rsp), %xmm0
361; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
362; SSE-NEXT: movq %rbp, %rsp
363; SSE-NEXT: popq %rbp
364; SSE-NEXT: retq
365;
Sanjay Patel113cac32018-08-26 18:20:41 +0000366; AVX1-LABEL: arg_i64_v4i64:
367; AVX1: # %bb.0:
368; AVX1-NEXT: vmovq %rdi, %xmm0
369; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
370; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
371; AVX1-NEXT: retq
372;
373; AVX2-LABEL: arg_i64_v4i64:
374; AVX2: # %bb.0:
375; AVX2-NEXT: vmovq %rdi, %xmm0
376; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
377; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000378 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
379 ret <4 x i64> %ins
380}
381
382define <8 x float> @arg_f32_v8f32(float %x, i32 %y) nounwind {
383; SSE-LABEL: arg_f32_v8f32:
384; SSE: # %bb.0:
385; SSE-NEXT: pushq %rbp
386; SSE-NEXT: movq %rsp, %rbp
387; SSE-NEXT: andq $-32, %rsp
388; SSE-NEXT: subq $64, %rsp
389; SSE-NEXT: # kill: def $edi killed $edi def $rdi
390; SSE-NEXT: andl $7, %edi
391; SSE-NEXT: movss %xmm0, (%rsp,%rdi,4)
392; SSE-NEXT: movaps (%rsp), %xmm0
393; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
394; SSE-NEXT: movq %rbp, %rsp
395; SSE-NEXT: popq %rbp
396; SSE-NEXT: retq
397;
Sanjay Patel113cac32018-08-26 18:20:41 +0000398; AVX1-LABEL: arg_f32_v8f32:
399; AVX1: # %bb.0:
400; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
401; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
402; AVX1-NEXT: retq
403;
404; AVX2-LABEL: arg_f32_v8f32:
405; AVX2: # %bb.0:
406; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
407; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000408 %ins = insertelement <8 x float> undef, float %x, i32 %y
409 ret <8 x float> %ins
410}
411
412define <4 x double> @arg_f64_v4f64(double %x, i32 %y) nounwind {
413; SSE-LABEL: arg_f64_v4f64:
414; SSE: # %bb.0:
415; SSE-NEXT: pushq %rbp
416; SSE-NEXT: movq %rsp, %rbp
417; SSE-NEXT: andq $-32, %rsp
418; SSE-NEXT: subq $64, %rsp
419; SSE-NEXT: # kill: def $edi killed $edi def $rdi
420; SSE-NEXT: andl $3, %edi
421; SSE-NEXT: movsd %xmm0, (%rsp,%rdi,8)
422; SSE-NEXT: movaps (%rsp), %xmm0
423; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
424; SSE-NEXT: movq %rbp, %rsp
425; SSE-NEXT: popq %rbp
426; SSE-NEXT: retq
427;
Sanjay Patel113cac32018-08-26 18:20:41 +0000428; AVX1-LABEL: arg_f64_v4f64:
429; AVX1: # %bb.0:
430; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
431; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
432; AVX1-NEXT: retq
433;
434; AVX2-LABEL: arg_f64_v4f64:
435; AVX2: # %bb.0:
436; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
437; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000438 %ins = insertelement <4 x double> undef, double %x, i32 %y
439 ret <4 x double> %ins
440}
441
442define <32 x i8> @load_i8_v32i8(i8* %p, i32 %y) nounwind {
443; SSE-LABEL: load_i8_v32i8:
444; SSE: # %bb.0:
445; SSE-NEXT: pushq %rbp
446; SSE-NEXT: movq %rsp, %rbp
447; SSE-NEXT: andq $-32, %rsp
448; SSE-NEXT: subq $64, %rsp
449; SSE-NEXT: # kill: def $esi killed $esi def $rsi
450; SSE-NEXT: movb (%rdi), %al
451; SSE-NEXT: andl $31, %esi
452; SSE-NEXT: movb %al, (%rsp,%rsi)
453; SSE-NEXT: movaps (%rsp), %xmm0
454; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
455; SSE-NEXT: movq %rbp, %rsp
456; SSE-NEXT: popq %rbp
457; SSE-NEXT: retq
458;
Sanjay Patel113cac32018-08-26 18:20:41 +0000459; AVX1-LABEL: load_i8_v32i8:
460; AVX1: # %bb.0:
461; AVX1-NEXT: movzbl (%rdi), %eax
462; AVX1-NEXT: vmovd %eax, %xmm0
463; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
464; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
465; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
466; AVX1-NEXT: retq
467;
468; AVX2-LABEL: load_i8_v32i8:
469; AVX2: # %bb.0:
470; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0
471; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000472 %x = load i8, i8* %p
473 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
474 ret <32 x i8> %ins
475}
476
477define <16 x i16> @load_i16_v16i16(i16* %p, i32 %y) nounwind {
478; SSE-LABEL: load_i16_v16i16:
479; SSE: # %bb.0:
480; SSE-NEXT: pushq %rbp
481; SSE-NEXT: movq %rsp, %rbp
482; SSE-NEXT: andq $-32, %rsp
483; SSE-NEXT: subq $64, %rsp
484; SSE-NEXT: # kill: def $esi killed $esi def $rsi
485; SSE-NEXT: movzwl (%rdi), %eax
486; SSE-NEXT: andl $15, %esi
487; SSE-NEXT: movw %ax, (%rsp,%rsi,2)
488; SSE-NEXT: movaps (%rsp), %xmm0
489; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
490; SSE-NEXT: movq %rbp, %rsp
491; SSE-NEXT: popq %rbp
492; SSE-NEXT: retq
493;
Sanjay Patel113cac32018-08-26 18:20:41 +0000494; AVX1-LABEL: load_i16_v16i16:
495; AVX1: # %bb.0:
496; AVX1-NEXT: movzwl (%rdi), %eax
497; AVX1-NEXT: vmovd %eax, %xmm0
498; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
499; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
500; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
501; AVX1-NEXT: retq
502;
503; AVX2-LABEL: load_i16_v16i16:
504; AVX2: # %bb.0:
505; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
506; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000507 %x = load i16, i16* %p
508 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
509 ret <16 x i16> %ins
510}
511
512define <8 x i32> @load_i32_v8i32(i32* %p, i32 %y) nounwind {
513; SSE-LABEL: load_i32_v8i32:
514; SSE: # %bb.0:
515; SSE-NEXT: pushq %rbp
516; SSE-NEXT: movq %rsp, %rbp
517; SSE-NEXT: andq $-32, %rsp
518; SSE-NEXT: subq $64, %rsp
519; SSE-NEXT: # kill: def $esi killed $esi def $rsi
520; SSE-NEXT: movl (%rdi), %eax
521; SSE-NEXT: andl $7, %esi
522; SSE-NEXT: movl %eax, (%rsp,%rsi,4)
523; SSE-NEXT: movaps (%rsp), %xmm0
524; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
525; SSE-NEXT: movq %rbp, %rsp
526; SSE-NEXT: popq %rbp
527; SSE-NEXT: retq
528;
529; AVX-LABEL: load_i32_v8i32:
530; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000531; AVX-NEXT: vbroadcastss (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000532; AVX-NEXT: retq
533 %x = load i32, i32* %p
534 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
535 ret <8 x i32> %ins
536}
537
538define <4 x i64> @load_i64_v4i64(i64* %p, i32 %y) nounwind {
539; SSE-LABEL: load_i64_v4i64:
540; SSE: # %bb.0:
541; SSE-NEXT: pushq %rbp
542; SSE-NEXT: movq %rsp, %rbp
543; SSE-NEXT: andq $-32, %rsp
544; SSE-NEXT: subq $64, %rsp
545; SSE-NEXT: # kill: def $esi killed $esi def $rsi
546; SSE-NEXT: movq (%rdi), %rax
547; SSE-NEXT: andl $3, %esi
548; SSE-NEXT: movq %rax, (%rsp,%rsi,8)
549; SSE-NEXT: movaps (%rsp), %xmm0
550; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
551; SSE-NEXT: movq %rbp, %rsp
552; SSE-NEXT: popq %rbp
553; SSE-NEXT: retq
554;
555; AVX-LABEL: load_i64_v4i64:
556; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000557; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000558; AVX-NEXT: retq
559 %x = load i64, i64* %p
560 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
561 ret <4 x i64> %ins
562}
563
564define <8 x float> @load_f32_v8f32(float* %p, i32 %y) nounwind {
565; SSE-LABEL: load_f32_v8f32:
566; SSE: # %bb.0:
567; SSE-NEXT: pushq %rbp
568; SSE-NEXT: movq %rsp, %rbp
569; SSE-NEXT: andq $-32, %rsp
570; SSE-NEXT: subq $64, %rsp
571; SSE-NEXT: # kill: def $esi killed $esi def $rsi
572; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
573; SSE-NEXT: andl $7, %esi
574; SSE-NEXT: movss %xmm0, (%rsp,%rsi,4)
575; SSE-NEXT: movaps (%rsp), %xmm0
576; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
577; SSE-NEXT: movq %rbp, %rsp
578; SSE-NEXT: popq %rbp
579; SSE-NEXT: retq
580;
581; AVX-LABEL: load_f32_v8f32:
582; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000583; AVX-NEXT: vbroadcastss (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000584; AVX-NEXT: retq
585 %x = load float, float* %p
586 %ins = insertelement <8 x float> undef, float %x, i32 %y
587 ret <8 x float> %ins
588}
589
590define <4 x double> @load_f64_v4f64(double* %p, i32 %y) nounwind {
591; SSE-LABEL: load_f64_v4f64:
592; SSE: # %bb.0:
593; SSE-NEXT: pushq %rbp
594; SSE-NEXT: movq %rsp, %rbp
595; SSE-NEXT: andq $-32, %rsp
596; SSE-NEXT: subq $64, %rsp
597; SSE-NEXT: # kill: def $esi killed $esi def $rsi
598; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
599; SSE-NEXT: andl $3, %esi
600; SSE-NEXT: movsd %xmm0, (%rsp,%rsi,8)
601; SSE-NEXT: movaps (%rsp), %xmm0
602; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
603; SSE-NEXT: movq %rbp, %rsp
604; SSE-NEXT: popq %rbp
605; SSE-NEXT: retq
606;
607; AVX-LABEL: load_f64_v4f64:
608; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000609; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000610; AVX-NEXT: retq
611 %x = load double, double* %p
612 %ins = insertelement <4 x double> undef, double %x, i32 %y
613 ret <4 x double> %ins
614}
615