blob: a1edf39ddf29d00b97e55df4bf67e4a082f5c9ce [file] [log] [blame]
Hao Liu2cd34bb2015-06-26 02:45:36 +00001; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s
2
3; CHECK-LABEL: load_factor2:
4; CHECK: vld2.8 {d16, d17}, [r0]
5define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
6 %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
7 %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
8 %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
9 %add = add nsw <8 x i8> %strided.v0, %strided.v1
10 ret <8 x i8> %add
11}
12
13; CHECK-LABEL: load_factor3:
14; CHECK: vld3.32 {d16, d17, d18}, [r0]
15define <2 x i32> @load_factor3(i32* %ptr) {
16 %base = bitcast i32* %ptr to <6 x i32>*
17 %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
18 %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
19 %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
20 %add = add nsw <2 x i32> %strided.v2, %strided.v1
21 ret <2 x i32> %add
22}
23
24; CHECK-LABEL: load_factor4:
25; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
26; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
27define <4 x i32> @load_factor4(i32* %ptr) {
28 %base = bitcast i32* %ptr to <16 x i32>*
29 %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
30 %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
31 %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
32 %add = add nsw <4 x i32> %strided.v0, %strided.v2
33 ret <4 x i32> %add
34}
35
36; CHECK-LABEL: store_factor2:
37; CHECK: vst2.8 {d16, d17}, [r0]
38define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
39 %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
40 store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
41 ret void
42}
43
44; CHECK-LABEL: store_factor3:
45; CHECK: vst3.32 {d16, d18, d20}, [r0]!
46; CHECK: vst3.32 {d17, d19, d21}, [r0]
47define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
48 %base = bitcast i32* %ptr to <12 x i32>*
49 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
50 %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
51 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
52 store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
53 ret void
54}
55
56; CHECK-LABEL: store_factor4:
57; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
58; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
59define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
60 %base = bitcast i32* %ptr to <16 x i32>*
61 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
62 %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
63 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
64 store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
65 ret void
66}
67
68; The following cases test that interleaved access of pointer vectors can be
69; matched to ldN/stN instruction.
70
71; CHECK-LABEL: load_ptrvec_factor2:
72; CHECK: vld2.32 {d16, d17}, [r0]
73define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
74 %base = bitcast i32** %ptr to <4 x i32*>*
75 %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
76 %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
77 ret <2 x i32*> %strided.v0
78}
79
80; CHECK-LABEL: load_ptrvec_factor3:
81; CHECK: vld3.32 {d16, d17, d18}, [r0]
82define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
83 %base = bitcast i32** %ptr to <6 x i32*>*
84 %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
85 %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
86 store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
87 %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
88 store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
89 ret void
90}
91
92; CHECK-LABEL: load_ptrvec_factor4:
93; CHECK: vld4.32 {d16, d17, d18, d19}, [r0]
94define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
95 %base = bitcast i32** %ptr to <8 x i32*>*
96 %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
97 %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
98 %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
99 store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
100 store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
101 ret void
102}
103
104; CHECK-LABEL: store_ptrvec_factor2:
105; CHECK: vst2.32 {d16, d17}, [r0]
106define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
107 %base = bitcast i32** %ptr to <4 x i32*>*
108 %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
109 store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
110 ret void
111}
112
113; CHECK-LABEL: store_ptrvec_factor3:
114; CHECK: vst3.32 {d16, d17, d18}, [r0]
115define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
116 %base = bitcast i32** %ptr to <6 x i32*>*
117 %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
118 %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
119 %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
120 store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
121 ret void
122}
123
124; CHECK-LABEL: store_ptrvec_factor4:
125; CHECK: vst4.32 {d16, d17, d18, d19}, [r0]
126define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
127 %base = bitcast i32* %ptr to <8 x i32*>*
128 %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
129 %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
130 %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
131 store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
132 ret void
133}
134
135; Following cases check that shuffle maskes with undef indices can be matched
136; into ldN/stN instruction.
137
138; CHECK-LABEL: load_undef_mask_factor2:
139; CHECK: vld2.32 {d16, d17, d18, d19}, [r0]
140define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
141 %base = bitcast i32* %ptr to <8 x i32>*
142 %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
143 %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
144 %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
145 %add = add nsw <4 x i32> %strided.v0, %strided.v1
146 ret <4 x i32> %add
147}
148
149; CHECK-LABEL: load_undef_mask_factor3:
150; CHECK: vld3.32 {d16, d18, d20}, [r0]!
151; CHECK: vld3.32 {d17, d19, d21}, [r0]
152define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
153 %base = bitcast i32* %ptr to <12 x i32>*
154 %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
155 %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
156 %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
157 %add = add nsw <4 x i32> %strided.v2, %strided.v1
158 ret <4 x i32> %add
159}
160
161; CHECK-LABEL: load_undef_mask_factor4:
162; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
163; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
164define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
165 %base = bitcast i32* %ptr to <16 x i32>*
166 %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
167 %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
168 %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
169 %add = add nsw <4 x i32> %strided.v0, %strided.v2
170 ret <4 x i32> %add
171}
172
173; CHECK-LABEL: store_undef_mask_factor2:
174; CHECK: vst2.32 {d16, d17, d18, d19}, [r0]
175define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
176 %base = bitcast i32* %ptr to <8 x i32>*
177 %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
178 store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
179 ret void
180}
181
182; CHECK-LABEL: store_undef_mask_factor3:
183; CHECK: vst3.32 {d16, d18, d20}, [r0]!
184; CHECK: vst3.32 {d17, d19, d21}, [r0]
185define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
186 %base = bitcast i32* %ptr to <12 x i32>*
187 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
188 %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
189 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
190 store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
191 ret void
192}
193
194; CHECK-LABEL: store_undef_mask_factor4:
195; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
196; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
197define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
198 %base = bitcast i32* %ptr to <16 x i32>*
199 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
200 %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
201 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
202 store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
203 ret void
204}
Jeroen Ketemaab99b592015-09-30 10:56:37 +0000205
206; The following test cases check that address spaces are properly handled
207
208; CHECK-LABEL: load_address_space
209; CHECK: vld3.32
210define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
211 %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
212 %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
213 store <2 x i32> %interleaved, <2 x i32>* %B
214 ret void
215}
216
217; CHECK-LABEL: store_address_space
218; CHECK: vst2.32
219define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
220 %tmp0 = load <2 x i32>, <2 x i32>* %A
221 %tmp1 = load <2 x i32>, <2 x i32>* %B
222 %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
223 store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
224 ret void
225}