blob: b38019f860ae4bc92f752ef76b48b21af267a67c [file] [log] [blame]
Sanjay Patel8d7c8c72017-09-04 22:01:25 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64
4
5define void @big_nonzero_16_bytes(i32* nocapture %a) {
6; X32-LABEL: big_nonzero_16_bytes:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007; X32: # %bb.0:
Sanjay Patel8d7c8c72017-09-04 22:01:25 +00008; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Sanjay Patel65d67802017-09-16 13:29:12 +00009; X32-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,3,4]
10; X32-NEXT: vmovups %xmm0, (%eax)
Sanjay Patel8d7c8c72017-09-04 22:01:25 +000011; X32-NEXT: retl
12;
13; X64-LABEL: big_nonzero_16_bytes:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000014; X64: # %bb.0:
Sanjay Patel65d67802017-09-16 13:29:12 +000015; X64-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,3,4]
16; X64-NEXT: vmovups %xmm0, (%rdi)
Sanjay Patel8d7c8c72017-09-04 22:01:25 +000017; X64-NEXT: retq
18 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
19 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
20 %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3
21
22 store i32 1, i32* %a, align 4
23 store i32 2, i32* %arrayidx1, align 4
24 store i32 3, i32* %arrayidx2, align 4
25 store i32 4, i32* %arrayidx3, align 4
26 ret void
27}
28
Sanjay Patel65d67802017-09-16 13:29:12 +000029; TODO: We assumed that two 64-bit stores were better than 1 vector load and 1 vector store.
30; But if the 64-bit constants can't be represented as sign-extended 32-bit constants, then
31; it takes extra instructions to do this in scalar.
32
33define void @big_nonzero_16_bytes_big64bit_constants(i64* nocapture %a) {
34; X32-LABEL: big_nonzero_16_bytes_big64bit_constants:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000035; X32: # %bb.0:
Sanjay Patel65d67802017-09-16 13:29:12 +000036; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
37; X32-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,3]
38; X32-NEXT: vmovups %xmm0, (%eax)
39; X32-NEXT: retl
40;
41; X64-LABEL: big_nonzero_16_bytes_big64bit_constants:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000042; X64: # %bb.0:
Sanjay Patel65d67802017-09-16 13:29:12 +000043; X64-NEXT: movabsq $4294967297, %rax # imm = 0x100000001
44; X64-NEXT: movq %rax, (%rdi)
45; X64-NEXT: movabsq $12884901889, %rax # imm = 0x300000001
46; X64-NEXT: movq %rax, 8(%rdi)
47; X64-NEXT: retq
48 %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1
49
50 store i64 4294967297, i64* %a
51 store i64 12884901889, i64* %arrayidx1
52 ret void
53}
54
Sanjay Patel8d7c8c72017-09-04 22:01:25 +000055; Splats may be an opportunity to use a broadcast op.
56
57define void @big_nonzero_32_bytes_splat(i32* nocapture %a) {
58; X32-LABEL: big_nonzero_32_bytes_splat:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; X32: # %bb.0:
Sanjay Patel8d7c8c72017-09-04 22:01:25 +000060; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Sanjay Patel65d67802017-09-16 13:29:12 +000061; X32-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
62; X32-NEXT: vmovups %ymm0, (%eax)
63; X32-NEXT: vzeroupper
Sanjay Patel8d7c8c72017-09-04 22:01:25 +000064; X32-NEXT: retl
65;
66; X64-LABEL: big_nonzero_32_bytes_splat:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000067; X64: # %bb.0:
Sanjay Patel65d67802017-09-16 13:29:12 +000068; X64-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
69; X64-NEXT: vmovups %ymm0, (%rdi)
70; X64-NEXT: vzeroupper
Sanjay Patel8d7c8c72017-09-04 22:01:25 +000071; X64-NEXT: retq
72 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
73 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
74 %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3
75 %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 4
76 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 5
77 %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 6
78 %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 7
79
80 store i32 42, i32* %a, align 4
81 store i32 42, i32* %arrayidx1, align 4
82 store i32 42, i32* %arrayidx2, align 4
83 store i32 42, i32* %arrayidx3, align 4
84 store i32 42, i32* %arrayidx4, align 4
85 store i32 42, i32* %arrayidx5, align 4
86 store i32 42, i32* %arrayidx6, align 4
87 store i32 42, i32* %arrayidx7, align 4
88 ret void
89}
90
91; Verify that we choose the best-sized store(s) for each chunk.
92
93define void @big_nonzero_63_bytes(i8* nocapture %a) {
94; X32-LABEL: big_nonzero_63_bytes:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000095; X32: # %bb.0:
Sanjay Patel8d7c8c72017-09-04 22:01:25 +000096; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Sanjay Patel65d67802017-09-16 13:29:12 +000097; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1,0,2,0,3,0,4,0]
98; X32-NEXT: vmovups %ymm0, (%eax)
99; X32-NEXT: vmovaps {{.*#+}} xmm0 = [5,0,6,0]
100; X32-NEXT: vmovups %xmm0, 32(%eax)
Sanjay Patel8d7c8c72017-09-04 22:01:25 +0000101; X32-NEXT: movl $0, 52(%eax)
102; X32-NEXT: movl $7, 48(%eax)
103; X32-NEXT: movl $8, 56(%eax)
104; X32-NEXT: movw $9, 60(%eax)
105; X32-NEXT: movb $10, 62(%eax)
Sanjay Patel65d67802017-09-16 13:29:12 +0000106; X32-NEXT: vzeroupper
Sanjay Patel8d7c8c72017-09-04 22:01:25 +0000107; X32-NEXT: retl
108;
109; X64-LABEL: big_nonzero_63_bytes:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000110; X64: # %bb.0:
Sanjay Patel65d67802017-09-16 13:29:12 +0000111; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,3,4]
112; X64-NEXT: vmovups %ymm0, (%rdi)
Sanjay Patel8d7c8c72017-09-04 22:01:25 +0000113; X64-NEXT: movq $5, 32(%rdi)
114; X64-NEXT: movq $6, 40(%rdi)
115; X64-NEXT: movq $7, 48(%rdi)
116; X64-NEXT: movl $8, 56(%rdi)
117; X64-NEXT: movw $9, 60(%rdi)
118; X64-NEXT: movb $10, 62(%rdi)
Sanjay Patel65d67802017-09-16 13:29:12 +0000119; X64-NEXT: vzeroupper
Sanjay Patel8d7c8c72017-09-04 22:01:25 +0000120; X64-NEXT: retq
121 %a8 = bitcast i8* %a to i64*
122 %arrayidx8 = getelementptr inbounds i64, i64* %a8, i64 1
123 %arrayidx16 = getelementptr inbounds i64, i64* %a8, i64 2
124 %arrayidx24 = getelementptr inbounds i64, i64* %a8, i64 3
125 %arrayidx32 = getelementptr inbounds i64, i64* %a8, i64 4
126 %arrayidx40 = getelementptr inbounds i64, i64* %a8, i64 5
127 %arrayidx48 = getelementptr inbounds i64, i64* %a8, i64 6
128 %a4 = bitcast i8* %a to i32*
129 %arrayidx56 = getelementptr inbounds i32, i32* %a4, i64 14
130 %a2 = bitcast i8* %a to i16*
131 %arrayidx60 = getelementptr inbounds i16, i16* %a2, i64 30
132 %arrayidx62 = getelementptr inbounds i8, i8* %a, i64 62
133
134 store i64 1, i64* %a8
135 store i64 2, i64* %arrayidx8
136 store i64 3, i64* %arrayidx16
137 store i64 4, i64* %arrayidx24
138 store i64 5, i64* %arrayidx32
139 store i64 6, i64* %arrayidx40
140 store i64 7, i64* %arrayidx48
141 store i32 8, i32* %arrayidx56
142 store i16 9, i16* %arrayidx60
143 store i8 10, i8* %arrayidx62
144 ret void
145}
146