blob: a06f43f7a1181f5291901754455c9dc96d0dc5a8 [file] [log] [blame]
Sanjay Patel8d7c8c72017-09-04 22:01:25 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64
4
5define void @big_nonzero_16_bytes(i32* nocapture %a) {
6; X32-LABEL: big_nonzero_16_bytes:
7; X32: # BB#0:
8; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT: movl $1, (%eax)
10; X32-NEXT: movl $2, 4(%eax)
11; X32-NEXT: movl $3, 8(%eax)
12; X32-NEXT: movl $4, 12(%eax)
13; X32-NEXT: retl
14;
15; X64-LABEL: big_nonzero_16_bytes:
16; X64: # BB#0:
17; X64-NEXT: movabsq $8589934593, %rax # imm = 0x200000001
18; X64-NEXT: movq %rax, (%rdi)
19; X64-NEXT: movabsq $17179869187, %rax # imm = 0x400000003
20; X64-NEXT: movq %rax, 8(%rdi)
21; X64-NEXT: retq
22 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
23 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
24 %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3
25
26 store i32 1, i32* %a, align 4
27 store i32 2, i32* %arrayidx1, align 4
28 store i32 3, i32* %arrayidx2, align 4
29 store i32 4, i32* %arrayidx3, align 4
30 ret void
31}
32
33; Splats may be an opportunity to use a broadcast op.
34
35define void @big_nonzero_32_bytes_splat(i32* nocapture %a) {
36; X32-LABEL: big_nonzero_32_bytes_splat:
37; X32: # BB#0:
38; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
39; X32-NEXT: movl $42, (%eax)
40; X32-NEXT: movl $42, 4(%eax)
41; X32-NEXT: movl $42, 8(%eax)
42; X32-NEXT: movl $42, 12(%eax)
43; X32-NEXT: movl $42, 16(%eax)
44; X32-NEXT: movl $42, 20(%eax)
45; X32-NEXT: movl $42, 24(%eax)
46; X32-NEXT: movl $42, 28(%eax)
47; X32-NEXT: retl
48;
49; X64-LABEL: big_nonzero_32_bytes_splat:
50; X64: # BB#0:
51; X64-NEXT: movabsq $180388626474, %rax # imm = 0x2A0000002A
52; X64-NEXT: movq %rax, (%rdi)
53; X64-NEXT: movq %rax, 8(%rdi)
54; X64-NEXT: movq %rax, 16(%rdi)
55; X64-NEXT: movq %rax, 24(%rdi)
56; X64-NEXT: retq
57 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
58 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
59 %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3
60 %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 4
61 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 5
62 %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 6
63 %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 7
64
65 store i32 42, i32* %a, align 4
66 store i32 42, i32* %arrayidx1, align 4
67 store i32 42, i32* %arrayidx2, align 4
68 store i32 42, i32* %arrayidx3, align 4
69 store i32 42, i32* %arrayidx4, align 4
70 store i32 42, i32* %arrayidx5, align 4
71 store i32 42, i32* %arrayidx6, align 4
72 store i32 42, i32* %arrayidx7, align 4
73 ret void
74}
75
76; Verify that we choose the best-sized store(s) for each chunk.
77
78define void @big_nonzero_63_bytes(i8* nocapture %a) {
79; X32-LABEL: big_nonzero_63_bytes:
80; X32: # BB#0:
81; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
82; X32-NEXT: movl $0, 4(%eax)
83; X32-NEXT: movl $1, (%eax)
84; X32-NEXT: movl $0, 12(%eax)
85; X32-NEXT: movl $2, 8(%eax)
86; X32-NEXT: movl $0, 20(%eax)
87; X32-NEXT: movl $3, 16(%eax)
88; X32-NEXT: movl $0, 28(%eax)
89; X32-NEXT: movl $4, 24(%eax)
90; X32-NEXT: movl $0, 36(%eax)
91; X32-NEXT: movl $5, 32(%eax)
92; X32-NEXT: movl $0, 44(%eax)
93; X32-NEXT: movl $6, 40(%eax)
94; X32-NEXT: movl $0, 52(%eax)
95; X32-NEXT: movl $7, 48(%eax)
96; X32-NEXT: movl $8, 56(%eax)
97; X32-NEXT: movw $9, 60(%eax)
98; X32-NEXT: movb $10, 62(%eax)
99; X32-NEXT: retl
100;
101; X64-LABEL: big_nonzero_63_bytes:
102; X64: # BB#0:
103; X64-NEXT: movq $1, (%rdi)
104; X64-NEXT: movq $2, 8(%rdi)
105; X64-NEXT: movq $3, 16(%rdi)
106; X64-NEXT: movq $4, 24(%rdi)
107; X64-NEXT: movq $5, 32(%rdi)
108; X64-NEXT: movq $6, 40(%rdi)
109; X64-NEXT: movq $7, 48(%rdi)
110; X64-NEXT: movl $8, 56(%rdi)
111; X64-NEXT: movw $9, 60(%rdi)
112; X64-NEXT: movb $10, 62(%rdi)
113; X64-NEXT: retq
114 %a8 = bitcast i8* %a to i64*
115 %arrayidx8 = getelementptr inbounds i64, i64* %a8, i64 1
116 %arrayidx16 = getelementptr inbounds i64, i64* %a8, i64 2
117 %arrayidx24 = getelementptr inbounds i64, i64* %a8, i64 3
118 %arrayidx32 = getelementptr inbounds i64, i64* %a8, i64 4
119 %arrayidx40 = getelementptr inbounds i64, i64* %a8, i64 5
120 %arrayidx48 = getelementptr inbounds i64, i64* %a8, i64 6
121 %a4 = bitcast i8* %a to i32*
122 %arrayidx56 = getelementptr inbounds i32, i32* %a4, i64 14
123 %a2 = bitcast i8* %a to i16*
124 %arrayidx60 = getelementptr inbounds i16, i16* %a2, i64 30
125 %arrayidx62 = getelementptr inbounds i8, i8* %a, i64 62
126
127 store i64 1, i64* %a8
128 store i64 2, i64* %arrayidx8
129 store i64 3, i64* %arrayidx16
130 store i64 4, i64* %arrayidx24
131 store i64 5, i64* %arrayidx32
132 store i64 6, i64* %arrayidx40
133 store i64 7, i64* %arrayidx48
134 store i32 8, i32* %arrayidx56
135 store i16 9, i16* %arrayidx60
136 store i8 10, i8* %arrayidx62
137 ret void
138}
139