blob: d04cdd8b00b6e5890dc935db096f67915620a8d1 [file] [log] [blame]
Sanjay Patel851e02e52018-08-23 18:38:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
5
6define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind {
7; SSE-LABEL: arg_i8_v16i8:
8; SSE: # %bb.0:
9; SSE-NEXT: # kill: def $esi killed $esi def $rsi
10; SSE-NEXT: andl $15, %esi
11; SSE-NEXT: movb %dil, -24(%rsp,%rsi)
12; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
13; SSE-NEXT: retq
14;
15; AVX-LABEL: arg_i8_v16i8:
16; AVX: # %bb.0:
17; AVX-NEXT: # kill: def $esi killed $esi def $rsi
18; AVX-NEXT: andl $15, %esi
19; AVX-NEXT: movb %dil, -24(%rsp,%rsi)
20; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
21; AVX-NEXT: retq
22 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
23 ret <16 x i8> %ins
24}
25
26define <8 x i16> @arg_i16_v8i16(i16 %x, i32 %y) nounwind {
27; SSE-LABEL: arg_i16_v8i16:
28; SSE: # %bb.0:
29; SSE-NEXT: # kill: def $esi killed $esi def $rsi
30; SSE-NEXT: andl $7, %esi
31; SSE-NEXT: movw %di, -24(%rsp,%rsi,2)
32; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
33; SSE-NEXT: retq
34;
35; AVX-LABEL: arg_i16_v8i16:
36; AVX: # %bb.0:
37; AVX-NEXT: # kill: def $esi killed $esi def $rsi
38; AVX-NEXT: andl $7, %esi
39; AVX-NEXT: movw %di, -24(%rsp,%rsi,2)
40; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
41; AVX-NEXT: retq
42 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
43 ret <8 x i16> %ins
44}
45
46define <4 x i32> @arg_i32_v4i32(i32 %x, i32 %y) nounwind {
47; SSE-LABEL: arg_i32_v4i32:
48; SSE: # %bb.0:
49; SSE-NEXT: # kill: def $esi killed $esi def $rsi
50; SSE-NEXT: andl $3, %esi
51; SSE-NEXT: movl %edi, -24(%rsp,%rsi,4)
52; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
53; SSE-NEXT: retq
54;
55; AVX-LABEL: arg_i32_v4i32:
56; AVX: # %bb.0:
57; AVX-NEXT: # kill: def $esi killed $esi def $rsi
58; AVX-NEXT: andl $3, %esi
59; AVX-NEXT: movl %edi, -24(%rsp,%rsi,4)
60; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
61; AVX-NEXT: retq
62 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
63 ret <4 x i32> %ins
64}
65
66define <2 x i64> @arg_i64_v2i64(i64 %x, i32 %y) nounwind {
67; SSE-LABEL: arg_i64_v2i64:
68; SSE: # %bb.0:
69; SSE-NEXT: # kill: def $esi killed $esi def $rsi
70; SSE-NEXT: andl $1, %esi
71; SSE-NEXT: movq %rdi, -24(%rsp,%rsi,8)
72; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
73; SSE-NEXT: retq
74;
75; AVX-LABEL: arg_i64_v2i64:
76; AVX: # %bb.0:
77; AVX-NEXT: # kill: def $esi killed $esi def $rsi
78; AVX-NEXT: andl $1, %esi
79; AVX-NEXT: movq %rdi, -24(%rsp,%rsi,8)
80; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
81; AVX-NEXT: retq
82 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
83 ret <2 x i64> %ins
84}
85
86define <4 x float> @arg_f32_v4f32(float %x, i32 %y) nounwind {
87; SSE-LABEL: arg_f32_v4f32:
88; SSE: # %bb.0:
89; SSE-NEXT: # kill: def $edi killed $edi def $rdi
90; SSE-NEXT: andl $3, %edi
91; SSE-NEXT: movss %xmm0, -24(%rsp,%rdi,4)
92; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
93; SSE-NEXT: retq
94;
95; AVX-LABEL: arg_f32_v4f32:
96; AVX: # %bb.0:
97; AVX-NEXT: # kill: def $edi killed $edi def $rdi
98; AVX-NEXT: andl $3, %edi
99; AVX-NEXT: vmovss %xmm0, -24(%rsp,%rdi,4)
100; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
101; AVX-NEXT: retq
102 %ins = insertelement <4 x float> undef, float %x, i32 %y
103 ret <4 x float> %ins
104}
105
106define <2 x double> @arg_f64_v2f64(double %x, i32 %y) nounwind {
107; SSE-LABEL: arg_f64_v2f64:
108; SSE: # %bb.0:
109; SSE-NEXT: # kill: def $edi killed $edi def $rdi
110; SSE-NEXT: andl $1, %edi
111; SSE-NEXT: movsd %xmm0, -24(%rsp,%rdi,8)
112; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
113; SSE-NEXT: retq
114;
115; AVX-LABEL: arg_f64_v2f64:
116; AVX: # %bb.0:
117; AVX-NEXT: # kill: def $edi killed $edi def $rdi
118; AVX-NEXT: andl $1, %edi
119; AVX-NEXT: vmovsd %xmm0, -24(%rsp,%rdi,8)
120; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
121; AVX-NEXT: retq
122 %ins = insertelement <2 x double> undef, double %x, i32 %y
123 ret <2 x double> %ins
124}
125
126define <16 x i8> @load_i8_v16i8(i8* %p, i32 %y) nounwind {
127; SSE-LABEL: load_i8_v16i8:
128; SSE: # %bb.0:
129; SSE-NEXT: # kill: def $esi killed $esi def $rsi
130; SSE-NEXT: movb (%rdi), %al
131; SSE-NEXT: andl $15, %esi
132; SSE-NEXT: movb %al, -24(%rsp,%rsi)
133; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
134; SSE-NEXT: retq
135;
136; AVX-LABEL: load_i8_v16i8:
137; AVX: # %bb.0:
138; AVX-NEXT: # kill: def $esi killed $esi def $rsi
139; AVX-NEXT: movb (%rdi), %al
140; AVX-NEXT: andl $15, %esi
141; AVX-NEXT: movb %al, -24(%rsp,%rsi)
142; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
143; AVX-NEXT: retq
144 %x = load i8, i8* %p
145 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
146 ret <16 x i8> %ins
147}
148
149define <8 x i16> @load_i16_v8i16(i16* %p, i32 %y) nounwind {
150; SSE-LABEL: load_i16_v8i16:
151; SSE: # %bb.0:
152; SSE-NEXT: # kill: def $esi killed $esi def $rsi
153; SSE-NEXT: movzwl (%rdi), %eax
154; SSE-NEXT: andl $7, %esi
155; SSE-NEXT: movw %ax, -24(%rsp,%rsi,2)
156; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
157; SSE-NEXT: retq
158;
159; AVX-LABEL: load_i16_v8i16:
160; AVX: # %bb.0:
161; AVX-NEXT: # kill: def $esi killed $esi def $rsi
162; AVX-NEXT: movzwl (%rdi), %eax
163; AVX-NEXT: andl $7, %esi
164; AVX-NEXT: movw %ax, -24(%rsp,%rsi,2)
165; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
166; AVX-NEXT: retq
167 %x = load i16, i16* %p
168 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
169 ret <8 x i16> %ins
170}
171
172define <4 x i32> @load_i32_v4i32(i32* %p, i32 %y) nounwind {
173; SSE-LABEL: load_i32_v4i32:
174; SSE: # %bb.0:
175; SSE-NEXT: # kill: def $esi killed $esi def $rsi
176; SSE-NEXT: movl (%rdi), %eax
177; SSE-NEXT: andl $3, %esi
178; SSE-NEXT: movl %eax, -24(%rsp,%rsi,4)
179; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
180; SSE-NEXT: retq
181;
182; AVX-LABEL: load_i32_v4i32:
183; AVX: # %bb.0:
184; AVX-NEXT: # kill: def $esi killed $esi def $rsi
185; AVX-NEXT: movl (%rdi), %eax
186; AVX-NEXT: andl $3, %esi
187; AVX-NEXT: movl %eax, -24(%rsp,%rsi,4)
188; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
189; AVX-NEXT: retq
190 %x = load i32, i32* %p
191 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
192 ret <4 x i32> %ins
193}
194
195define <2 x i64> @load_i64_v2i64(i64* %p, i32 %y) nounwind {
196; SSE-LABEL: load_i64_v2i64:
197; SSE: # %bb.0:
198; SSE-NEXT: # kill: def $esi killed $esi def $rsi
199; SSE-NEXT: movq (%rdi), %rax
200; SSE-NEXT: andl $1, %esi
201; SSE-NEXT: movq %rax, -24(%rsp,%rsi,8)
202; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
203; SSE-NEXT: retq
204;
205; AVX-LABEL: load_i64_v2i64:
206; AVX: # %bb.0:
207; AVX-NEXT: # kill: def $esi killed $esi def $rsi
208; AVX-NEXT: movq (%rdi), %rax
209; AVX-NEXT: andl $1, %esi
210; AVX-NEXT: movq %rax, -24(%rsp,%rsi,8)
211; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
212; AVX-NEXT: retq
213 %x = load i64, i64* %p
214 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
215 ret <2 x i64> %ins
216}
217
218define <4 x float> @load_f32_v4f32(float* %p, i32 %y) nounwind {
219; SSE-LABEL: load_f32_v4f32:
220; SSE: # %bb.0:
221; SSE-NEXT: # kill: def $esi killed $esi def $rsi
222; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
223; SSE-NEXT: andl $3, %esi
224; SSE-NEXT: movss %xmm0, -24(%rsp,%rsi,4)
225; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
226; SSE-NEXT: retq
227;
228; AVX-LABEL: load_f32_v4f32:
229; AVX: # %bb.0:
230; AVX-NEXT: # kill: def $esi killed $esi def $rsi
231; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
232; AVX-NEXT: andl $3, %esi
233; AVX-NEXT: vmovss %xmm0, -24(%rsp,%rsi,4)
234; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
235; AVX-NEXT: retq
236 %x = load float, float* %p
237 %ins = insertelement <4 x float> undef, float %x, i32 %y
238 ret <4 x float> %ins
239}
240
241define <2 x double> @load_f64_v2f64(double* %p, i32 %y) nounwind {
242; SSE-LABEL: load_f64_v2f64:
243; SSE: # %bb.0:
244; SSE-NEXT: # kill: def $esi killed $esi def $rsi
245; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
246; SSE-NEXT: andl $1, %esi
247; SSE-NEXT: movsd %xmm0, -24(%rsp,%rsi,8)
248; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
249; SSE-NEXT: retq
250;
251; AVX-LABEL: load_f64_v2f64:
252; AVX: # %bb.0:
253; AVX-NEXT: # kill: def $esi killed $esi def $rsi
254; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
255; AVX-NEXT: andl $1, %esi
256; AVX-NEXT: vmovsd %xmm0, -24(%rsp,%rsi,8)
257; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
258; AVX-NEXT: retq
259 %x = load double, double* %p
260 %ins = insertelement <2 x double> undef, double %x, i32 %y
261 ret <2 x double> %ins
262}
263
264define <32 x i8> @arg_i8_v32i8(i8 %x, i32 %y) nounwind {
265; SSE-LABEL: arg_i8_v32i8:
266; SSE: # %bb.0:
267; SSE-NEXT: pushq %rbp
268; SSE-NEXT: movq %rsp, %rbp
269; SSE-NEXT: andq $-32, %rsp
270; SSE-NEXT: subq $64, %rsp
271; SSE-NEXT: # kill: def $esi killed $esi def $rsi
272; SSE-NEXT: andl $31, %esi
273; SSE-NEXT: movb %dil, (%rsp,%rsi)
274; SSE-NEXT: movaps (%rsp), %xmm0
275; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
276; SSE-NEXT: movq %rbp, %rsp
277; SSE-NEXT: popq %rbp
278; SSE-NEXT: retq
279;
280; AVX-LABEL: arg_i8_v32i8:
281; AVX: # %bb.0:
282; AVX-NEXT: pushq %rbp
283; AVX-NEXT: movq %rsp, %rbp
284; AVX-NEXT: andq $-32, %rsp
285; AVX-NEXT: subq $64, %rsp
286; AVX-NEXT: # kill: def $esi killed $esi def $rsi
287; AVX-NEXT: andl $31, %esi
288; AVX-NEXT: movb %dil, (%rsp,%rsi)
289; AVX-NEXT: vmovaps (%rsp), %ymm0
290; AVX-NEXT: movq %rbp, %rsp
291; AVX-NEXT: popq %rbp
292; AVX-NEXT: retq
293 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
294 ret <32 x i8> %ins
295}
296
297define <16 x i16> @arg_i16_v16i16(i16 %x, i32 %y) nounwind {
298; SSE-LABEL: arg_i16_v16i16:
299; SSE: # %bb.0:
300; SSE-NEXT: pushq %rbp
301; SSE-NEXT: movq %rsp, %rbp
302; SSE-NEXT: andq $-32, %rsp
303; SSE-NEXT: subq $64, %rsp
304; SSE-NEXT: # kill: def $esi killed $esi def $rsi
305; SSE-NEXT: andl $15, %esi
306; SSE-NEXT: movw %di, (%rsp,%rsi,2)
307; SSE-NEXT: movaps (%rsp), %xmm0
308; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
309; SSE-NEXT: movq %rbp, %rsp
310; SSE-NEXT: popq %rbp
311; SSE-NEXT: retq
312;
313; AVX-LABEL: arg_i16_v16i16:
314; AVX: # %bb.0:
315; AVX-NEXT: pushq %rbp
316; AVX-NEXT: movq %rsp, %rbp
317; AVX-NEXT: andq $-32, %rsp
318; AVX-NEXT: subq $64, %rsp
319; AVX-NEXT: # kill: def $esi killed $esi def $rsi
320; AVX-NEXT: andl $15, %esi
321; AVX-NEXT: movw %di, (%rsp,%rsi,2)
322; AVX-NEXT: vmovaps (%rsp), %ymm0
323; AVX-NEXT: movq %rbp, %rsp
324; AVX-NEXT: popq %rbp
325; AVX-NEXT: retq
326 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
327 ret <16 x i16> %ins
328}
329
330define <8 x i32> @arg_i32_v8i32(i32 %x, i32 %y) nounwind {
331; SSE-LABEL: arg_i32_v8i32:
332; SSE: # %bb.0:
333; SSE-NEXT: pushq %rbp
334; SSE-NEXT: movq %rsp, %rbp
335; SSE-NEXT: andq $-32, %rsp
336; SSE-NEXT: subq $64, %rsp
337; SSE-NEXT: # kill: def $esi killed $esi def $rsi
338; SSE-NEXT: andl $7, %esi
339; SSE-NEXT: movl %edi, (%rsp,%rsi,4)
340; SSE-NEXT: movaps (%rsp), %xmm0
341; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
342; SSE-NEXT: movq %rbp, %rsp
343; SSE-NEXT: popq %rbp
344; SSE-NEXT: retq
345;
346; AVX-LABEL: arg_i32_v8i32:
347; AVX: # %bb.0:
348; AVX-NEXT: pushq %rbp
349; AVX-NEXT: movq %rsp, %rbp
350; AVX-NEXT: andq $-32, %rsp
351; AVX-NEXT: subq $64, %rsp
352; AVX-NEXT: # kill: def $esi killed $esi def $rsi
353; AVX-NEXT: andl $7, %esi
354; AVX-NEXT: movl %edi, (%rsp,%rsi,4)
355; AVX-NEXT: vmovaps (%rsp), %ymm0
356; AVX-NEXT: movq %rbp, %rsp
357; AVX-NEXT: popq %rbp
358; AVX-NEXT: retq
359 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
360 ret <8 x i32> %ins
361}
362
363define <4 x i64> @arg_i64_v4i64(i64 %x, i32 %y) nounwind {
364; SSE-LABEL: arg_i64_v4i64:
365; SSE: # %bb.0:
366; SSE-NEXT: pushq %rbp
367; SSE-NEXT: movq %rsp, %rbp
368; SSE-NEXT: andq $-32, %rsp
369; SSE-NEXT: subq $64, %rsp
370; SSE-NEXT: # kill: def $esi killed $esi def $rsi
371; SSE-NEXT: andl $3, %esi
372; SSE-NEXT: movq %rdi, (%rsp,%rsi,8)
373; SSE-NEXT: movaps (%rsp), %xmm0
374; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
375; SSE-NEXT: movq %rbp, %rsp
376; SSE-NEXT: popq %rbp
377; SSE-NEXT: retq
378;
379; AVX-LABEL: arg_i64_v4i64:
380; AVX: # %bb.0:
381; AVX-NEXT: pushq %rbp
382; AVX-NEXT: movq %rsp, %rbp
383; AVX-NEXT: andq $-32, %rsp
384; AVX-NEXT: subq $64, %rsp
385; AVX-NEXT: # kill: def $esi killed $esi def $rsi
386; AVX-NEXT: andl $3, %esi
387; AVX-NEXT: movq %rdi, (%rsp,%rsi,8)
388; AVX-NEXT: vmovaps (%rsp), %ymm0
389; AVX-NEXT: movq %rbp, %rsp
390; AVX-NEXT: popq %rbp
391; AVX-NEXT: retq
392 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
393 ret <4 x i64> %ins
394}
395
396define <8 x float> @arg_f32_v8f32(float %x, i32 %y) nounwind {
397; SSE-LABEL: arg_f32_v8f32:
398; SSE: # %bb.0:
399; SSE-NEXT: pushq %rbp
400; SSE-NEXT: movq %rsp, %rbp
401; SSE-NEXT: andq $-32, %rsp
402; SSE-NEXT: subq $64, %rsp
403; SSE-NEXT: # kill: def $edi killed $edi def $rdi
404; SSE-NEXT: andl $7, %edi
405; SSE-NEXT: movss %xmm0, (%rsp,%rdi,4)
406; SSE-NEXT: movaps (%rsp), %xmm0
407; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
408; SSE-NEXT: movq %rbp, %rsp
409; SSE-NEXT: popq %rbp
410; SSE-NEXT: retq
411;
412; AVX-LABEL: arg_f32_v8f32:
413; AVX: # %bb.0:
414; AVX-NEXT: pushq %rbp
415; AVX-NEXT: movq %rsp, %rbp
416; AVX-NEXT: andq $-32, %rsp
417; AVX-NEXT: subq $64, %rsp
418; AVX-NEXT: # kill: def $edi killed $edi def $rdi
419; AVX-NEXT: andl $7, %edi
420; AVX-NEXT: vmovss %xmm0, (%rsp,%rdi,4)
421; AVX-NEXT: vmovaps (%rsp), %ymm0
422; AVX-NEXT: movq %rbp, %rsp
423; AVX-NEXT: popq %rbp
424; AVX-NEXT: retq
425 %ins = insertelement <8 x float> undef, float %x, i32 %y
426 ret <8 x float> %ins
427}
428
429define <4 x double> @arg_f64_v4f64(double %x, i32 %y) nounwind {
430; SSE-LABEL: arg_f64_v4f64:
431; SSE: # %bb.0:
432; SSE-NEXT: pushq %rbp
433; SSE-NEXT: movq %rsp, %rbp
434; SSE-NEXT: andq $-32, %rsp
435; SSE-NEXT: subq $64, %rsp
436; SSE-NEXT: # kill: def $edi killed $edi def $rdi
437; SSE-NEXT: andl $3, %edi
438; SSE-NEXT: movsd %xmm0, (%rsp,%rdi,8)
439; SSE-NEXT: movaps (%rsp), %xmm0
440; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
441; SSE-NEXT: movq %rbp, %rsp
442; SSE-NEXT: popq %rbp
443; SSE-NEXT: retq
444;
445; AVX-LABEL: arg_f64_v4f64:
446; AVX: # %bb.0:
447; AVX-NEXT: pushq %rbp
448; AVX-NEXT: movq %rsp, %rbp
449; AVX-NEXT: andq $-32, %rsp
450; AVX-NEXT: subq $64, %rsp
451; AVX-NEXT: # kill: def $edi killed $edi def $rdi
452; AVX-NEXT: andl $3, %edi
453; AVX-NEXT: vmovsd %xmm0, (%rsp,%rdi,8)
454; AVX-NEXT: vmovaps (%rsp), %ymm0
455; AVX-NEXT: movq %rbp, %rsp
456; AVX-NEXT: popq %rbp
457; AVX-NEXT: retq
458 %ins = insertelement <4 x double> undef, double %x, i32 %y
459 ret <4 x double> %ins
460}
461
462define <32 x i8> @load_i8_v32i8(i8* %p, i32 %y) nounwind {
463; SSE-LABEL: load_i8_v32i8:
464; SSE: # %bb.0:
465; SSE-NEXT: pushq %rbp
466; SSE-NEXT: movq %rsp, %rbp
467; SSE-NEXT: andq $-32, %rsp
468; SSE-NEXT: subq $64, %rsp
469; SSE-NEXT: # kill: def $esi killed $esi def $rsi
470; SSE-NEXT: movb (%rdi), %al
471; SSE-NEXT: andl $31, %esi
472; SSE-NEXT: movb %al, (%rsp,%rsi)
473; SSE-NEXT: movaps (%rsp), %xmm0
474; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
475; SSE-NEXT: movq %rbp, %rsp
476; SSE-NEXT: popq %rbp
477; SSE-NEXT: retq
478;
479; AVX-LABEL: load_i8_v32i8:
480; AVX: # %bb.0:
481; AVX-NEXT: pushq %rbp
482; AVX-NEXT: movq %rsp, %rbp
483; AVX-NEXT: andq $-32, %rsp
484; AVX-NEXT: subq $64, %rsp
485; AVX-NEXT: # kill: def $esi killed $esi def $rsi
486; AVX-NEXT: movb (%rdi), %al
487; AVX-NEXT: andl $31, %esi
488; AVX-NEXT: movb %al, (%rsp,%rsi)
489; AVX-NEXT: vmovaps (%rsp), %ymm0
490; AVX-NEXT: movq %rbp, %rsp
491; AVX-NEXT: popq %rbp
492; AVX-NEXT: retq
493 %x = load i8, i8* %p
494 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
495 ret <32 x i8> %ins
496}
497
498define <16 x i16> @load_i16_v16i16(i16* %p, i32 %y) nounwind {
499; SSE-LABEL: load_i16_v16i16:
500; SSE: # %bb.0:
501; SSE-NEXT: pushq %rbp
502; SSE-NEXT: movq %rsp, %rbp
503; SSE-NEXT: andq $-32, %rsp
504; SSE-NEXT: subq $64, %rsp
505; SSE-NEXT: # kill: def $esi killed $esi def $rsi
506; SSE-NEXT: movzwl (%rdi), %eax
507; SSE-NEXT: andl $15, %esi
508; SSE-NEXT: movw %ax, (%rsp,%rsi,2)
509; SSE-NEXT: movaps (%rsp), %xmm0
510; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
511; SSE-NEXT: movq %rbp, %rsp
512; SSE-NEXT: popq %rbp
513; SSE-NEXT: retq
514;
515; AVX-LABEL: load_i16_v16i16:
516; AVX: # %bb.0:
517; AVX-NEXT: pushq %rbp
518; AVX-NEXT: movq %rsp, %rbp
519; AVX-NEXT: andq $-32, %rsp
520; AVX-NEXT: subq $64, %rsp
521; AVX-NEXT: # kill: def $esi killed $esi def $rsi
522; AVX-NEXT: movzwl (%rdi), %eax
523; AVX-NEXT: andl $15, %esi
524; AVX-NEXT: movw %ax, (%rsp,%rsi,2)
525; AVX-NEXT: vmovaps (%rsp), %ymm0
526; AVX-NEXT: movq %rbp, %rsp
527; AVX-NEXT: popq %rbp
528; AVX-NEXT: retq
529 %x = load i16, i16* %p
530 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
531 ret <16 x i16> %ins
532}
533
534define <8 x i32> @load_i32_v8i32(i32* %p, i32 %y) nounwind {
535; SSE-LABEL: load_i32_v8i32:
536; SSE: # %bb.0:
537; SSE-NEXT: pushq %rbp
538; SSE-NEXT: movq %rsp, %rbp
539; SSE-NEXT: andq $-32, %rsp
540; SSE-NEXT: subq $64, %rsp
541; SSE-NEXT: # kill: def $esi killed $esi def $rsi
542; SSE-NEXT: movl (%rdi), %eax
543; SSE-NEXT: andl $7, %esi
544; SSE-NEXT: movl %eax, (%rsp,%rsi,4)
545; SSE-NEXT: movaps (%rsp), %xmm0
546; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
547; SSE-NEXT: movq %rbp, %rsp
548; SSE-NEXT: popq %rbp
549; SSE-NEXT: retq
550;
551; AVX-LABEL: load_i32_v8i32:
552; AVX: # %bb.0:
553; AVX-NEXT: pushq %rbp
554; AVX-NEXT: movq %rsp, %rbp
555; AVX-NEXT: andq $-32, %rsp
556; AVX-NEXT: subq $64, %rsp
557; AVX-NEXT: # kill: def $esi killed $esi def $rsi
558; AVX-NEXT: movl (%rdi), %eax
559; AVX-NEXT: andl $7, %esi
560; AVX-NEXT: movl %eax, (%rsp,%rsi,4)
561; AVX-NEXT: vmovaps (%rsp), %ymm0
562; AVX-NEXT: movq %rbp, %rsp
563; AVX-NEXT: popq %rbp
564; AVX-NEXT: retq
565 %x = load i32, i32* %p
566 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
567 ret <8 x i32> %ins
568}
569
570define <4 x i64> @load_i64_v4i64(i64* %p, i32 %y) nounwind {
571; SSE-LABEL: load_i64_v4i64:
572; SSE: # %bb.0:
573; SSE-NEXT: pushq %rbp
574; SSE-NEXT: movq %rsp, %rbp
575; SSE-NEXT: andq $-32, %rsp
576; SSE-NEXT: subq $64, %rsp
577; SSE-NEXT: # kill: def $esi killed $esi def $rsi
578; SSE-NEXT: movq (%rdi), %rax
579; SSE-NEXT: andl $3, %esi
580; SSE-NEXT: movq %rax, (%rsp,%rsi,8)
581; SSE-NEXT: movaps (%rsp), %xmm0
582; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
583; SSE-NEXT: movq %rbp, %rsp
584; SSE-NEXT: popq %rbp
585; SSE-NEXT: retq
586;
587; AVX-LABEL: load_i64_v4i64:
588; AVX: # %bb.0:
589; AVX-NEXT: pushq %rbp
590; AVX-NEXT: movq %rsp, %rbp
591; AVX-NEXT: andq $-32, %rsp
592; AVX-NEXT: subq $64, %rsp
593; AVX-NEXT: # kill: def $esi killed $esi def $rsi
594; AVX-NEXT: movq (%rdi), %rax
595; AVX-NEXT: andl $3, %esi
596; AVX-NEXT: movq %rax, (%rsp,%rsi,8)
597; AVX-NEXT: vmovaps (%rsp), %ymm0
598; AVX-NEXT: movq %rbp, %rsp
599; AVX-NEXT: popq %rbp
600; AVX-NEXT: retq
601 %x = load i64, i64* %p
602 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
603 ret <4 x i64> %ins
604}
605
606define <8 x float> @load_f32_v8f32(float* %p, i32 %y) nounwind {
607; SSE-LABEL: load_f32_v8f32:
608; SSE: # %bb.0:
609; SSE-NEXT: pushq %rbp
610; SSE-NEXT: movq %rsp, %rbp
611; SSE-NEXT: andq $-32, %rsp
612; SSE-NEXT: subq $64, %rsp
613; SSE-NEXT: # kill: def $esi killed $esi def $rsi
614; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
615; SSE-NEXT: andl $7, %esi
616; SSE-NEXT: movss %xmm0, (%rsp,%rsi,4)
617; SSE-NEXT: movaps (%rsp), %xmm0
618; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
619; SSE-NEXT: movq %rbp, %rsp
620; SSE-NEXT: popq %rbp
621; SSE-NEXT: retq
622;
623; AVX-LABEL: load_f32_v8f32:
624; AVX: # %bb.0:
625; AVX-NEXT: pushq %rbp
626; AVX-NEXT: movq %rsp, %rbp
627; AVX-NEXT: andq $-32, %rsp
628; AVX-NEXT: subq $64, %rsp
629; AVX-NEXT: # kill: def $esi killed $esi def $rsi
630; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
631; AVX-NEXT: andl $7, %esi
632; AVX-NEXT: vmovss %xmm0, (%rsp,%rsi,4)
633; AVX-NEXT: vmovaps (%rsp), %ymm0
634; AVX-NEXT: movq %rbp, %rsp
635; AVX-NEXT: popq %rbp
636; AVX-NEXT: retq
637 %x = load float, float* %p
638 %ins = insertelement <8 x float> undef, float %x, i32 %y
639 ret <8 x float> %ins
640}
641
642define <4 x double> @load_f64_v4f64(double* %p, i32 %y) nounwind {
643; SSE-LABEL: load_f64_v4f64:
644; SSE: # %bb.0:
645; SSE-NEXT: pushq %rbp
646; SSE-NEXT: movq %rsp, %rbp
647; SSE-NEXT: andq $-32, %rsp
648; SSE-NEXT: subq $64, %rsp
649; SSE-NEXT: # kill: def $esi killed $esi def $rsi
650; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
651; SSE-NEXT: andl $3, %esi
652; SSE-NEXT: movsd %xmm0, (%rsp,%rsi,8)
653; SSE-NEXT: movaps (%rsp), %xmm0
654; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
655; SSE-NEXT: movq %rbp, %rsp
656; SSE-NEXT: popq %rbp
657; SSE-NEXT: retq
658;
659; AVX-LABEL: load_f64_v4f64:
660; AVX: # %bb.0:
661; AVX-NEXT: pushq %rbp
662; AVX-NEXT: movq %rsp, %rbp
663; AVX-NEXT: andq $-32, %rsp
664; AVX-NEXT: subq $64, %rsp
665; AVX-NEXT: # kill: def $esi killed $esi def $rsi
666; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
667; AVX-NEXT: andl $3, %esi
668; AVX-NEXT: vmovsd %xmm0, (%rsp,%rsi,8)
669; AVX-NEXT: vmovaps (%rsp), %ymm0
670; AVX-NEXT: movq %rbp, %rsp
671; AVX-NEXT: popq %rbp
672; AVX-NEXT: retq
673 %x = load double, double* %p
674 %ins = insertelement <4 x double> undef, double %x, i32 %y
675 ret <4 x double> %ins
676}
677