blob: cbb29202cca29e2291ce69f6aa320df9adfc53ac [file] [log] [blame]
Sanjay Patel851e02e52018-08-23 18:38:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
5
6define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind {
7; SSE-LABEL: arg_i8_v16i8:
8; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +00009; SSE-NEXT: movd %edi, %xmm0
10; SSE-NEXT: pxor %xmm1, %xmm1
11; SSE-NEXT: pshufb %xmm1, %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +000012; SSE-NEXT: retq
13;
Sanjay Patel113cac32018-08-26 18:20:41 +000014; AVX1-LABEL: arg_i8_v16i8:
15; AVX1: # %bb.0:
16; AVX1-NEXT: vmovd %edi, %xmm0
17; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
19; AVX1-NEXT: retq
20;
21; AVX2-LABEL: arg_i8_v16i8:
22; AVX2: # %bb.0:
23; AVX2-NEXT: vmovd %edi, %xmm0
24; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
25; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000026 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
27 ret <16 x i8> %ins
28}
29
30define <8 x i16> @arg_i16_v8i16(i16 %x, i32 %y) nounwind {
31; SSE-LABEL: arg_i16_v8i16:
32; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +000033; SSE-NEXT: movd %edi, %xmm0
34; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
35; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +000036; SSE-NEXT: retq
37;
Sanjay Patel113cac32018-08-26 18:20:41 +000038; AVX1-LABEL: arg_i16_v8i16:
39; AVX1: # %bb.0:
40; AVX1-NEXT: vmovd %edi, %xmm0
41; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
42; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
43; AVX1-NEXT: retq
44;
45; AVX2-LABEL: arg_i16_v8i16:
46; AVX2: # %bb.0:
47; AVX2-NEXT: vmovd %edi, %xmm0
48; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
49; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000050 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
51 ret <8 x i16> %ins
52}
53
54define <4 x i32> @arg_i32_v4i32(i32 %x, i32 %y) nounwind {
55; SSE-LABEL: arg_i32_v4i32:
56; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +000057; SSE-NEXT: movd %edi, %xmm0
58; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +000059; SSE-NEXT: retq
60;
Sanjay Patel113cac32018-08-26 18:20:41 +000061; AVX1-LABEL: arg_i32_v4i32:
62; AVX1: # %bb.0:
63; AVX1-NEXT: vmovd %edi, %xmm0
64; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
65; AVX1-NEXT: retq
66;
67; AVX2-LABEL: arg_i32_v4i32:
68; AVX2: # %bb.0:
69; AVX2-NEXT: vmovd %edi, %xmm0
70; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
71; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000072 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
73 ret <4 x i32> %ins
74}
75
76define <2 x i64> @arg_i64_v2i64(i64 %x, i32 %y) nounwind {
77; SSE-LABEL: arg_i64_v2i64:
78; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +000079; SSE-NEXT: movq %rdi, %xmm0
80; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Sanjay Patel851e02e52018-08-23 18:38:40 +000081; SSE-NEXT: retq
82;
Sanjay Patel113cac32018-08-26 18:20:41 +000083; AVX1-LABEL: arg_i64_v2i64:
84; AVX1: # %bb.0:
85; AVX1-NEXT: vmovq %rdi, %xmm0
86; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
87; AVX1-NEXT: retq
88;
89; AVX2-LABEL: arg_i64_v2i64:
90; AVX2: # %bb.0:
91; AVX2-NEXT: vmovq %rdi, %xmm0
92; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
93; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +000094 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
95 ret <2 x i64> %ins
96}
97
98define <4 x float> @arg_f32_v4f32(float %x, i32 %y) nounwind {
99; SSE-LABEL: arg_f32_v4f32:
100; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000101; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000102; SSE-NEXT: retq
103;
Sanjay Patel113cac32018-08-26 18:20:41 +0000104; AVX1-LABEL: arg_f32_v4f32:
105; AVX1: # %bb.0:
106; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
107; AVX1-NEXT: retq
108;
109; AVX2-LABEL: arg_f32_v4f32:
110; AVX2: # %bb.0:
111; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
112; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000113 %ins = insertelement <4 x float> undef, float %x, i32 %y
114 ret <4 x float> %ins
115}
116
117define <2 x double> @arg_f64_v2f64(double %x, i32 %y) nounwind {
118; SSE-LABEL: arg_f64_v2f64:
119; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000120; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000121; SSE-NEXT: retq
122;
123; AVX-LABEL: arg_f64_v2f64:
124; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000125; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000126; AVX-NEXT: retq
127 %ins = insertelement <2 x double> undef, double %x, i32 %y
128 ret <2 x double> %ins
129}
130
131define <16 x i8> @load_i8_v16i8(i8* %p, i32 %y) nounwind {
132; SSE-LABEL: load_i8_v16i8:
133; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000134; SSE-NEXT: movzbl (%rdi), %eax
135; SSE-NEXT: movd %eax, %xmm0
136; SSE-NEXT: pxor %xmm1, %xmm1
137; SSE-NEXT: pshufb %xmm1, %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000138; SSE-NEXT: retq
139;
Sanjay Patel113cac32018-08-26 18:20:41 +0000140; AVX1-LABEL: load_i8_v16i8:
141; AVX1: # %bb.0:
142; AVX1-NEXT: movzbl (%rdi), %eax
143; AVX1-NEXT: vmovd %eax, %xmm0
144; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
145; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
146; AVX1-NEXT: retq
147;
148; AVX2-LABEL: load_i8_v16i8:
149; AVX2: # %bb.0:
150; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
151; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000152 %x = load i8, i8* %p
153 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
154 ret <16 x i8> %ins
155}
156
157define <8 x i16> @load_i16_v8i16(i16* %p, i32 %y) nounwind {
158; SSE-LABEL: load_i16_v8i16:
159; SSE: # %bb.0:
Sanjay Patel851e02e52018-08-23 18:38:40 +0000160; SSE-NEXT: movzwl (%rdi), %eax
Sanjay Patel113cac32018-08-26 18:20:41 +0000161; SSE-NEXT: movd %eax, %xmm0
162; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
163; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000164; SSE-NEXT: retq
165;
Sanjay Patel113cac32018-08-26 18:20:41 +0000166; AVX1-LABEL: load_i16_v8i16:
167; AVX1: # %bb.0:
168; AVX1-NEXT: movzwl (%rdi), %eax
169; AVX1-NEXT: vmovd %eax, %xmm0
170; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
171; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
172; AVX1-NEXT: retq
173;
174; AVX2-LABEL: load_i16_v8i16:
175; AVX2: # %bb.0:
176; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
177; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000178 %x = load i16, i16* %p
179 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
180 ret <8 x i16> %ins
181}
182
183define <4 x i32> @load_i32_v4i32(i32* %p, i32 %y) nounwind {
184; SSE-LABEL: load_i32_v4i32:
185; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000186; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
187; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000188; SSE-NEXT: retq
189;
190; AVX-LABEL: load_i32_v4i32:
191; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000192; AVX-NEXT: vbroadcastss (%rdi), %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000193; AVX-NEXT: retq
194 %x = load i32, i32* %p
195 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
196 ret <4 x i32> %ins
197}
198
199define <2 x i64> @load_i64_v2i64(i64* %p, i32 %y) nounwind {
200; SSE-LABEL: load_i64_v2i64:
201; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000202; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
203; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000204; SSE-NEXT: retq
205;
Simon Pilgrime95550f2019-02-01 21:41:30 +0000206; AVX-LABEL: load_i64_v2i64:
207; AVX: # %bb.0:
208; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
209; AVX-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000210 %x = load i64, i64* %p
211 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
212 ret <2 x i64> %ins
213}
214
215define <4 x float> @load_f32_v4f32(float* %p, i32 %y) nounwind {
216; SSE-LABEL: load_f32_v4f32:
217; SSE: # %bb.0:
Sanjay Patel851e02e52018-08-23 18:38:40 +0000218; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Sanjay Patel113cac32018-08-26 18:20:41 +0000219; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000220; SSE-NEXT: retq
221;
222; AVX-LABEL: load_f32_v4f32:
223; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000224; AVX-NEXT: vbroadcastss (%rdi), %xmm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000225; AVX-NEXT: retq
226 %x = load float, float* %p
227 %ins = insertelement <4 x float> undef, float %x, i32 %y
228 ret <4 x float> %ins
229}
230
231define <2 x double> @load_f64_v2f64(double* %p, i32 %y) nounwind {
232; SSE-LABEL: load_f64_v2f64:
233; SSE: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000234; SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000235; SSE-NEXT: retq
236;
237; AVX-LABEL: load_f64_v2f64:
238; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000239; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
Sanjay Patel851e02e52018-08-23 18:38:40 +0000240; AVX-NEXT: retq
241 %x = load double, double* %p
242 %ins = insertelement <2 x double> undef, double %x, i32 %y
243 ret <2 x double> %ins
244}
245
246define <32 x i8> @arg_i8_v32i8(i8 %x, i32 %y) nounwind {
247; SSE-LABEL: arg_i8_v32i8:
248; SSE: # %bb.0:
249; SSE-NEXT: pushq %rbp
250; SSE-NEXT: movq %rsp, %rbp
251; SSE-NEXT: andq $-32, %rsp
252; SSE-NEXT: subq $64, %rsp
253; SSE-NEXT: # kill: def $esi killed $esi def $rsi
254; SSE-NEXT: andl $31, %esi
255; SSE-NEXT: movb %dil, (%rsp,%rsi)
256; SSE-NEXT: movaps (%rsp), %xmm0
257; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
258; SSE-NEXT: movq %rbp, %rsp
259; SSE-NEXT: popq %rbp
260; SSE-NEXT: retq
261;
Sanjay Patel113cac32018-08-26 18:20:41 +0000262; AVX1-LABEL: arg_i8_v32i8:
263; AVX1: # %bb.0:
264; AVX1-NEXT: vmovd %edi, %xmm0
265; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
266; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
267; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
268; AVX1-NEXT: retq
269;
270; AVX2-LABEL: arg_i8_v32i8:
271; AVX2: # %bb.0:
272; AVX2-NEXT: vmovd %edi, %xmm0
273; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
274; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000275 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
276 ret <32 x i8> %ins
277}
278
279define <16 x i16> @arg_i16_v16i16(i16 %x, i32 %y) nounwind {
280; SSE-LABEL: arg_i16_v16i16:
281; SSE: # %bb.0:
282; SSE-NEXT: pushq %rbp
283; SSE-NEXT: movq %rsp, %rbp
284; SSE-NEXT: andq $-32, %rsp
285; SSE-NEXT: subq $64, %rsp
286; SSE-NEXT: # kill: def $esi killed $esi def $rsi
287; SSE-NEXT: andl $15, %esi
288; SSE-NEXT: movw %di, (%rsp,%rsi,2)
289; SSE-NEXT: movaps (%rsp), %xmm0
290; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
291; SSE-NEXT: movq %rbp, %rsp
292; SSE-NEXT: popq %rbp
293; SSE-NEXT: retq
294;
Sanjay Patel113cac32018-08-26 18:20:41 +0000295; AVX1-LABEL: arg_i16_v16i16:
296; AVX1: # %bb.0:
297; AVX1-NEXT: vmovd %edi, %xmm0
298; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
299; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
300; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
301; AVX1-NEXT: retq
302;
303; AVX2-LABEL: arg_i16_v16i16:
304; AVX2: # %bb.0:
305; AVX2-NEXT: vmovd %edi, %xmm0
306; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
307; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000308 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
309 ret <16 x i16> %ins
310}
311
312define <8 x i32> @arg_i32_v8i32(i32 %x, i32 %y) nounwind {
313; SSE-LABEL: arg_i32_v8i32:
314; SSE: # %bb.0:
315; SSE-NEXT: pushq %rbp
316; SSE-NEXT: movq %rsp, %rbp
317; SSE-NEXT: andq $-32, %rsp
318; SSE-NEXT: subq $64, %rsp
319; SSE-NEXT: # kill: def $esi killed $esi def $rsi
320; SSE-NEXT: andl $7, %esi
321; SSE-NEXT: movl %edi, (%rsp,%rsi,4)
322; SSE-NEXT: movaps (%rsp), %xmm0
323; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
324; SSE-NEXT: movq %rbp, %rsp
325; SSE-NEXT: popq %rbp
326; SSE-NEXT: retq
327;
Sanjay Patel113cac32018-08-26 18:20:41 +0000328; AVX1-LABEL: arg_i32_v8i32:
329; AVX1: # %bb.0:
330; AVX1-NEXT: vmovd %edi, %xmm0
331; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
332; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
333; AVX1-NEXT: retq
334;
335; AVX2-LABEL: arg_i32_v8i32:
336; AVX2: # %bb.0:
337; AVX2-NEXT: vmovd %edi, %xmm0
338; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
339; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000340 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
341 ret <8 x i32> %ins
342}
343
344define <4 x i64> @arg_i64_v4i64(i64 %x, i32 %y) nounwind {
345; SSE-LABEL: arg_i64_v4i64:
346; SSE: # %bb.0:
347; SSE-NEXT: pushq %rbp
348; SSE-NEXT: movq %rsp, %rbp
349; SSE-NEXT: andq $-32, %rsp
350; SSE-NEXT: subq $64, %rsp
351; SSE-NEXT: # kill: def $esi killed $esi def $rsi
352; SSE-NEXT: andl $3, %esi
353; SSE-NEXT: movq %rdi, (%rsp,%rsi,8)
354; SSE-NEXT: movaps (%rsp), %xmm0
355; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
356; SSE-NEXT: movq %rbp, %rsp
357; SSE-NEXT: popq %rbp
358; SSE-NEXT: retq
359;
Sanjay Patel113cac32018-08-26 18:20:41 +0000360; AVX1-LABEL: arg_i64_v4i64:
361; AVX1: # %bb.0:
362; AVX1-NEXT: vmovq %rdi, %xmm0
363; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
364; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
365; AVX1-NEXT: retq
366;
367; AVX2-LABEL: arg_i64_v4i64:
368; AVX2: # %bb.0:
369; AVX2-NEXT: vmovq %rdi, %xmm0
370; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
371; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000372 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
373 ret <4 x i64> %ins
374}
375
376define <8 x float> @arg_f32_v8f32(float %x, i32 %y) nounwind {
377; SSE-LABEL: arg_f32_v8f32:
378; SSE: # %bb.0:
379; SSE-NEXT: pushq %rbp
380; SSE-NEXT: movq %rsp, %rbp
381; SSE-NEXT: andq $-32, %rsp
382; SSE-NEXT: subq $64, %rsp
383; SSE-NEXT: # kill: def $edi killed $edi def $rdi
384; SSE-NEXT: andl $7, %edi
385; SSE-NEXT: movss %xmm0, (%rsp,%rdi,4)
386; SSE-NEXT: movaps (%rsp), %xmm0
387; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
388; SSE-NEXT: movq %rbp, %rsp
389; SSE-NEXT: popq %rbp
390; SSE-NEXT: retq
391;
Sanjay Patel113cac32018-08-26 18:20:41 +0000392; AVX1-LABEL: arg_f32_v8f32:
393; AVX1: # %bb.0:
394; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
395; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
396; AVX1-NEXT: retq
397;
398; AVX2-LABEL: arg_f32_v8f32:
399; AVX2: # %bb.0:
400; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
401; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000402 %ins = insertelement <8 x float> undef, float %x, i32 %y
403 ret <8 x float> %ins
404}
405
406define <4 x double> @arg_f64_v4f64(double %x, i32 %y) nounwind {
407; SSE-LABEL: arg_f64_v4f64:
408; SSE: # %bb.0:
409; SSE-NEXT: pushq %rbp
410; SSE-NEXT: movq %rsp, %rbp
411; SSE-NEXT: andq $-32, %rsp
412; SSE-NEXT: subq $64, %rsp
413; SSE-NEXT: # kill: def $edi killed $edi def $rdi
414; SSE-NEXT: andl $3, %edi
415; SSE-NEXT: movsd %xmm0, (%rsp,%rdi,8)
416; SSE-NEXT: movaps (%rsp), %xmm0
417; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
418; SSE-NEXT: movq %rbp, %rsp
419; SSE-NEXT: popq %rbp
420; SSE-NEXT: retq
421;
Sanjay Patel113cac32018-08-26 18:20:41 +0000422; AVX1-LABEL: arg_f64_v4f64:
423; AVX1: # %bb.0:
424; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
425; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
426; AVX1-NEXT: retq
427;
428; AVX2-LABEL: arg_f64_v4f64:
429; AVX2: # %bb.0:
430; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
431; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000432 %ins = insertelement <4 x double> undef, double %x, i32 %y
433 ret <4 x double> %ins
434}
435
436define <32 x i8> @load_i8_v32i8(i8* %p, i32 %y) nounwind {
437; SSE-LABEL: load_i8_v32i8:
438; SSE: # %bb.0:
439; SSE-NEXT: pushq %rbp
440; SSE-NEXT: movq %rsp, %rbp
441; SSE-NEXT: andq $-32, %rsp
442; SSE-NEXT: subq $64, %rsp
443; SSE-NEXT: # kill: def $esi killed $esi def $rsi
444; SSE-NEXT: movb (%rdi), %al
445; SSE-NEXT: andl $31, %esi
446; SSE-NEXT: movb %al, (%rsp,%rsi)
447; SSE-NEXT: movaps (%rsp), %xmm0
448; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
449; SSE-NEXT: movq %rbp, %rsp
450; SSE-NEXT: popq %rbp
451; SSE-NEXT: retq
452;
Sanjay Patel113cac32018-08-26 18:20:41 +0000453; AVX1-LABEL: load_i8_v32i8:
454; AVX1: # %bb.0:
455; AVX1-NEXT: movzbl (%rdi), %eax
456; AVX1-NEXT: vmovd %eax, %xmm0
457; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
458; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
459; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
460; AVX1-NEXT: retq
461;
462; AVX2-LABEL: load_i8_v32i8:
463; AVX2: # %bb.0:
464; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0
465; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000466 %x = load i8, i8* %p
467 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
468 ret <32 x i8> %ins
469}
470
471define <16 x i16> @load_i16_v16i16(i16* %p, i32 %y) nounwind {
472; SSE-LABEL: load_i16_v16i16:
473; SSE: # %bb.0:
474; SSE-NEXT: pushq %rbp
475; SSE-NEXT: movq %rsp, %rbp
476; SSE-NEXT: andq $-32, %rsp
477; SSE-NEXT: subq $64, %rsp
478; SSE-NEXT: # kill: def $esi killed $esi def $rsi
479; SSE-NEXT: movzwl (%rdi), %eax
480; SSE-NEXT: andl $15, %esi
481; SSE-NEXT: movw %ax, (%rsp,%rsi,2)
482; SSE-NEXT: movaps (%rsp), %xmm0
483; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
484; SSE-NEXT: movq %rbp, %rsp
485; SSE-NEXT: popq %rbp
486; SSE-NEXT: retq
487;
Sanjay Patel113cac32018-08-26 18:20:41 +0000488; AVX1-LABEL: load_i16_v16i16:
489; AVX1: # %bb.0:
490; AVX1-NEXT: movzwl (%rdi), %eax
491; AVX1-NEXT: vmovd %eax, %xmm0
492; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
493; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
494; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
495; AVX1-NEXT: retq
496;
497; AVX2-LABEL: load_i16_v16i16:
498; AVX2: # %bb.0:
499; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
500; AVX2-NEXT: retq
Sanjay Patel851e02e52018-08-23 18:38:40 +0000501 %x = load i16, i16* %p
502 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
503 ret <16 x i16> %ins
504}
505
506define <8 x i32> @load_i32_v8i32(i32* %p, i32 %y) nounwind {
507; SSE-LABEL: load_i32_v8i32:
508; SSE: # %bb.0:
509; SSE-NEXT: pushq %rbp
510; SSE-NEXT: movq %rsp, %rbp
511; SSE-NEXT: andq $-32, %rsp
512; SSE-NEXT: subq $64, %rsp
513; SSE-NEXT: # kill: def $esi killed $esi def $rsi
514; SSE-NEXT: movl (%rdi), %eax
515; SSE-NEXT: andl $7, %esi
516; SSE-NEXT: movl %eax, (%rsp,%rsi,4)
517; SSE-NEXT: movaps (%rsp), %xmm0
518; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
519; SSE-NEXT: movq %rbp, %rsp
520; SSE-NEXT: popq %rbp
521; SSE-NEXT: retq
522;
523; AVX-LABEL: load_i32_v8i32:
524; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000525; AVX-NEXT: vbroadcastss (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000526; AVX-NEXT: retq
527 %x = load i32, i32* %p
528 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
529 ret <8 x i32> %ins
530}
531
532define <4 x i64> @load_i64_v4i64(i64* %p, i32 %y) nounwind {
533; SSE-LABEL: load_i64_v4i64:
534; SSE: # %bb.0:
535; SSE-NEXT: pushq %rbp
536; SSE-NEXT: movq %rsp, %rbp
537; SSE-NEXT: andq $-32, %rsp
538; SSE-NEXT: subq $64, %rsp
539; SSE-NEXT: # kill: def $esi killed $esi def $rsi
540; SSE-NEXT: movq (%rdi), %rax
541; SSE-NEXT: andl $3, %esi
542; SSE-NEXT: movq %rax, (%rsp,%rsi,8)
543; SSE-NEXT: movaps (%rsp), %xmm0
544; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
545; SSE-NEXT: movq %rbp, %rsp
546; SSE-NEXT: popq %rbp
547; SSE-NEXT: retq
548;
549; AVX-LABEL: load_i64_v4i64:
550; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000551; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000552; AVX-NEXT: retq
553 %x = load i64, i64* %p
554 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
555 ret <4 x i64> %ins
556}
557
558define <8 x float> @load_f32_v8f32(float* %p, i32 %y) nounwind {
559; SSE-LABEL: load_f32_v8f32:
560; SSE: # %bb.0:
561; SSE-NEXT: pushq %rbp
562; SSE-NEXT: movq %rsp, %rbp
563; SSE-NEXT: andq $-32, %rsp
564; SSE-NEXT: subq $64, %rsp
565; SSE-NEXT: # kill: def $esi killed $esi def $rsi
566; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
567; SSE-NEXT: andl $7, %esi
568; SSE-NEXT: movss %xmm0, (%rsp,%rsi,4)
569; SSE-NEXT: movaps (%rsp), %xmm0
570; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
571; SSE-NEXT: movq %rbp, %rsp
572; SSE-NEXT: popq %rbp
573; SSE-NEXT: retq
574;
575; AVX-LABEL: load_f32_v8f32:
576; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000577; AVX-NEXT: vbroadcastss (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000578; AVX-NEXT: retq
579 %x = load float, float* %p
580 %ins = insertelement <8 x float> undef, float %x, i32 %y
581 ret <8 x float> %ins
582}
583
584define <4 x double> @load_f64_v4f64(double* %p, i32 %y) nounwind {
585; SSE-LABEL: load_f64_v4f64:
586; SSE: # %bb.0:
587; SSE-NEXT: pushq %rbp
588; SSE-NEXT: movq %rsp, %rbp
589; SSE-NEXT: andq $-32, %rsp
590; SSE-NEXT: subq $64, %rsp
591; SSE-NEXT: # kill: def $esi killed $esi def $rsi
592; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
593; SSE-NEXT: andl $3, %esi
594; SSE-NEXT: movsd %xmm0, (%rsp,%rsi,8)
595; SSE-NEXT: movaps (%rsp), %xmm0
596; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
597; SSE-NEXT: movq %rbp, %rsp
598; SSE-NEXT: popq %rbp
599; SSE-NEXT: retq
600;
601; AVX-LABEL: load_f64_v4f64:
602; AVX: # %bb.0:
Sanjay Patel113cac32018-08-26 18:20:41 +0000603; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
Sanjay Patel851e02e52018-08-23 18:38:40 +0000604; AVX-NEXT: retq
605 %x = load double, double* %p
606 %ins = insertelement <4 x double> undef, double %x, i32 %y
607 ret <4 x double> %ins
608}
609