blob: c3aa2d6b4da292267fb2c8e95231980db2066fa8 [file] [log] [blame]
Jeroen Ketemaaebca092015-10-07 14:53:29 +00001; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
2; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON
Hao Liu2cd34bb2015-06-26 02:45:36 +00003
Jeroen Ketemaaebca092015-10-07 14:53:29 +00004; NEON-LABEL: load_factor2:
5; NEON: vld2.8 {d16, d17}, [r0]
6; NONEON-LABEL: load_factor2:
7; NONEON-NOT: vld2
Hao Liu2cd34bb2015-06-26 02:45:36 +00008define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
9 %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
10 %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
11 %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
12 %add = add nsw <8 x i8> %strided.v0, %strided.v1
13 ret <8 x i8> %add
14}
15
Jeroen Ketemaaebca092015-10-07 14:53:29 +000016; NEON-LABEL: load_factor3:
17; NEON: vld3.32 {d16, d17, d18}, [r0]
18; NONEON-LABEL: load_factor3:
19; NONEON-NOT: vld3
Hao Liu2cd34bb2015-06-26 02:45:36 +000020define <2 x i32> @load_factor3(i32* %ptr) {
21 %base = bitcast i32* %ptr to <6 x i32>*
22 %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
23 %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
24 %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
25 %add = add nsw <2 x i32> %strided.v2, %strided.v1
26 ret <2 x i32> %add
27}
28
Jeroen Ketemaaebca092015-10-07 14:53:29 +000029; NEON-LABEL: load_factor4:
30; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
31; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
32; NONEON-LABEL: load_factor4:
33; NONEON-NOT: vld4
Hao Liu2cd34bb2015-06-26 02:45:36 +000034define <4 x i32> @load_factor4(i32* %ptr) {
35 %base = bitcast i32* %ptr to <16 x i32>*
36 %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
37 %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
38 %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
39 %add = add nsw <4 x i32> %strided.v0, %strided.v2
40 ret <4 x i32> %add
41}
42
Jeroen Ketemaaebca092015-10-07 14:53:29 +000043; NEON-LABEL: store_factor2:
44; NEON: vst2.8 {d16, d17}, [r0]
45; NONEON-LABEL: store_factor2:
46; NONEON-NOT: vst2
Hao Liu2cd34bb2015-06-26 02:45:36 +000047define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
48 %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
49 store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
50 ret void
51}
52
Jeroen Ketemaaebca092015-10-07 14:53:29 +000053; NEON-LABEL: store_factor3:
54; NEON: vst3.32 {d16, d18, d20}, [r0]!
55; NEON: vst3.32 {d17, d19, d21}, [r0]
56; NONEON-LABEL: store_factor3:
57; NONEON-NOT: vst3.32
Hao Liu2cd34bb2015-06-26 02:45:36 +000058define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
59 %base = bitcast i32* %ptr to <12 x i32>*
60 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61 %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
62 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
63 store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
64 ret void
65}
66
Jeroen Ketemaaebca092015-10-07 14:53:29 +000067; NEON-LABEL: store_factor4:
68; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
69; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
70; NONEON-LABEL: store_factor4:
71; NONEON-NOT: vst4
Hao Liu2cd34bb2015-06-26 02:45:36 +000072define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
73 %base = bitcast i32* %ptr to <16 x i32>*
74 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
75 %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
76 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
77 store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
78 ret void
79}
80
81; The following cases test that interleaved access of pointer vectors can be
82; matched to ldN/stN instruction.
83
Jeroen Ketemaaebca092015-10-07 14:53:29 +000084; NEON-LABEL: load_ptrvec_factor2:
85; NEON: vld2.32 {d16, d17}, [r0]
86; NONEON-LABEL: load_ptrvec_factor2:
87; NONEON-NOT: vld2
Hao Liu2cd34bb2015-06-26 02:45:36 +000088define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
89 %base = bitcast i32** %ptr to <4 x i32*>*
90 %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
91 %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
92 ret <2 x i32*> %strided.v0
93}
94
Jeroen Ketemaaebca092015-10-07 14:53:29 +000095; NEON-LABEL: load_ptrvec_factor3:
96; NEON: vld3.32 {d16, d17, d18}, [r0]
97; NONEON-LABEL: load_ptrvec_factor3:
98; NONEON-NOT: vld3
Hao Liu2cd34bb2015-06-26 02:45:36 +000099define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
100 %base = bitcast i32** %ptr to <6 x i32*>*
101 %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
102 %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
103 store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
104 %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
105 store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
106 ret void
107}
108
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000109; NEON-LABEL: load_ptrvec_factor4:
110; NEON: vld4.32 {d16, d17, d18, d19}, [r0]
111; NONEON-LABEL: load_ptrvec_factor4:
112; NONEON-NOT: vld4
Hao Liu2cd34bb2015-06-26 02:45:36 +0000113define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
114 %base = bitcast i32** %ptr to <8 x i32*>*
115 %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
116 %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
117 %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
118 store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
119 store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
120 ret void
121}
122
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000123; NEON-LABEL: store_ptrvec_factor2:
124; NEON: vst2.32 {d16, d17}, [r0]
125; NONEON-LABEL: store_ptrvec_factor2:
126; NONEON-NOT: vst2
Hao Liu2cd34bb2015-06-26 02:45:36 +0000127define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
128 %base = bitcast i32** %ptr to <4 x i32*>*
129 %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
130 store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
131 ret void
132}
133
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000134; NEON-LABEL: store_ptrvec_factor3:
135; NEON: vst3.32 {d16, d17, d18}, [r0]
136; NONEON-LABEL: store_ptrvec_factor3:
137; NONEON-NOT: vst3
Hao Liu2cd34bb2015-06-26 02:45:36 +0000138define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
139 %base = bitcast i32** %ptr to <6 x i32*>*
140 %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
141 %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
142 %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
143 store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
144 ret void
145}
146
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000147; NEON-LABEL: store_ptrvec_factor4:
148; NEON: vst4.32 {d16, d17, d18, d19}, [r0]
149; NONEON-LABEL: store_ptrvec_factor4:
150; NONEON-NOT: vst4
Hao Liu2cd34bb2015-06-26 02:45:36 +0000151define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
152 %base = bitcast i32* %ptr to <8 x i32*>*
153 %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
154 %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
155 %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
156 store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
157 ret void
158}
159
160; Following cases check that shuffle maskes with undef indices can be matched
161; into ldN/stN instruction.
162
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000163; NEON-LABEL: load_undef_mask_factor2:
164; NEON: vld2.32 {d16, d17, d18, d19}, [r0]
165; NONEON-LABEL: load_undef_mask_factor2:
166; NONEON-NOT: vld2
Hao Liu2cd34bb2015-06-26 02:45:36 +0000167define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
168 %base = bitcast i32* %ptr to <8 x i32>*
169 %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
170 %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
171 %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
172 %add = add nsw <4 x i32> %strided.v0, %strided.v1
173 ret <4 x i32> %add
174}
175
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000176; NEON-LABEL: load_undef_mask_factor3:
177; NEON: vld3.32 {d16, d18, d20}, [r0]!
178; NEON: vld3.32 {d17, d19, d21}, [r0]
179; NONEON-LABEL: load_undef_mask_factor3:
180; NONEON-NOT: vld3
Hao Liu2cd34bb2015-06-26 02:45:36 +0000181define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
182 %base = bitcast i32* %ptr to <12 x i32>*
183 %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
184 %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
185 %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
186 %add = add nsw <4 x i32> %strided.v2, %strided.v1
187 ret <4 x i32> %add
188}
189
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000190; NEON-LABEL: load_undef_mask_factor4:
191; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
192; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
193; NONEON-LABEL: load_undef_mask_factor4:
194; NONEON-NOT: vld4
Hao Liu2cd34bb2015-06-26 02:45:36 +0000195define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
196 %base = bitcast i32* %ptr to <16 x i32>*
197 %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
198 %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
199 %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
200 %add = add nsw <4 x i32> %strided.v0, %strided.v2
201 ret <4 x i32> %add
202}
203
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000204; NEON-LABEL: store_undef_mask_factor2:
205; NEON: vst2.32 {d16, d17, d18, d19}, [r0]
206; NONEON-LABEL: store_undef_mask_factor2:
207; NONEON-NOT: vst2
Hao Liu2cd34bb2015-06-26 02:45:36 +0000208define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
209 %base = bitcast i32* %ptr to <8 x i32>*
210 %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
211 store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
212 ret void
213}
214
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000215; NEON-LABEL: store_undef_mask_factor3:
216; NEON: vst3.32 {d16, d18, d20}, [r0]!
217; NEON: vst3.32 {d17, d19, d21}, [r0]
218; NONEON-LABEL: store_undef_mask_factor3:
219; NONEON-NOT: vst3
Hao Liu2cd34bb2015-06-26 02:45:36 +0000220define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
221 %base = bitcast i32* %ptr to <12 x i32>*
222 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
223 %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
224 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
225 store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
226 ret void
227}
228
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000229; NEON-LABEL: store_undef_mask_factor4:
230; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
231; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
232; NONEON-LABEL: store_undef_mask_factor4:
233; NONEON-NOT: vst4
Hao Liu2cd34bb2015-06-26 02:45:36 +0000234define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
235 %base = bitcast i32* %ptr to <16 x i32>*
236 %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
237 %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
238 %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
239 store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
240 ret void
241}
Jeroen Ketemaab99b592015-09-30 10:56:37 +0000242
243; The following test cases check that address spaces are properly handled
244
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000245; NEON-LABEL: load_address_space
246; NEON: vld3.32
247; NONEON-LABEL: load_address_space
248; NONEON-NOT: vld3
Jeroen Ketemaab99b592015-09-30 10:56:37 +0000249define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
250 %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
251 %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
252 store <2 x i32> %interleaved, <2 x i32>* %B
253 ret void
254}
255
Jeroen Ketemaaebca092015-10-07 14:53:29 +0000256; NEON-LABEL: store_address_space
257; NEON: vst2.32
258; NONEON-LABEL: store_address_space
259; NONEON-NOT: vst2
Jeroen Ketemaab99b592015-09-30 10:56:37 +0000260define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
261 %tmp0 = load <2 x i32>, <2 x i32>* %A
262 %tmp1 = load <2 x i32>, <2 x i32>* %B
263 %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
264 store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
265 ret void
266}