blob: ebbce70409c5f1dc032c47e6e0418e5a1583ffd2 [file] [log] [blame]
Zi Xuan Wuf940d852019-07-30 05:22:03 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8
4
5; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
6; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9
7
8; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8-BE
10
11; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
12; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9-BE
13
14define <2 x i64> @load_swap00(<2 x i64>* %vp1, <2 x i64>* %vp2) {
15; CHECK-P8-LABEL: load_swap00:
16; CHECK-P8: # %bb.0:
17; CHECK-P8-NEXT: lxvd2x v2, 0, r3
18; CHECK-P8-NEXT: blr
19;
20; CHECK-P9-LABEL: load_swap00:
21; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +000022; CHECK-P9-NEXT: lxv v2, 0(r3)
23; CHECK-P9-NEXT: xxswapd v2, v2
Zi Xuan Wuf940d852019-07-30 05:22:03 +000024; CHECK-P9-NEXT: blr
25;
26; CHECK-P8-BE-LABEL: load_swap00:
27; CHECK-P8-BE: # %bb.0:
28; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r3
29; CHECK-P8-BE-NEXT: xxswapd v2, v2
30; CHECK-P8-BE-NEXT: blr
31;
32; CHECK-P9-BE-LABEL: load_swap00:
33; CHECK-P9-BE: # %bb.0:
34; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
35; CHECK-P9-BE-NEXT: xxswapd v2, v2
36; CHECK-P9-BE-NEXT: blr
37 %v1 = load <2 x i64>, <2 x i64>* %vp1
38 %v2 = load <2 x i64>, <2 x i64>* %vp2
39 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
40 ret <2 x i64> %v3
41}
42
43define <2 x i64> @load_swap01(<2 x i64>* %vp1, <2 x i64>* %vp2) {
44; CHECK-P8-LABEL: load_swap01:
45; CHECK-P8: # %bb.0:
46; CHECK-P8-NEXT: lxvd2x v2, 0, r4
47; CHECK-P8-NEXT: blr
48;
49; CHECK-P9-LABEL: load_swap01:
50; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +000051; CHECK-P9-NEXT: lxv v2, 0(r4)
52; CHECK-P9-NEXT: xxswapd v2, v2
Zi Xuan Wuf940d852019-07-30 05:22:03 +000053; CHECK-P9-NEXT: blr
54;
55; CHECK-P8-BE-LABEL: load_swap01:
56; CHECK-P8-BE: # %bb.0:
57; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r4
58; CHECK-P8-BE-NEXT: xxswapd v2, v2
59; CHECK-P8-BE-NEXT: blr
60;
61; CHECK-P9-BE-LABEL: load_swap01:
62; CHECK-P9-BE: # %bb.0:
63; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
64; CHECK-P9-BE-NEXT: xxswapd v2, v2
65; CHECK-P9-BE-NEXT: blr
66 %v1 = load <2 x i64>, <2 x i64>* %vp1
67 %v2 = load <2 x i64>, <2 x i64>* %vp2
68 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
69 ret <2 x i64> %v3
70}
71
72define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) {
73; CHECK-P8-LABEL: load_swap10:
74; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +000075; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha
76; CHECK-P8-NEXT: lvx v3, 0, r3
77; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l
78; CHECK-P8-NEXT: lvx v2, 0, r4
79; CHECK-P8-NEXT: vperm v2, v3, v3, v2
Zi Xuan Wuf940d852019-07-30 05:22:03 +000080; CHECK-P8-NEXT: blr
81;
82; CHECK-P9-LABEL: load_swap10:
83; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +000084; CHECK-P9-NEXT: lxv v2, 0(r3)
85; CHECK-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
86; CHECK-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
87; CHECK-P9-NEXT: lxvx v3, 0, r3
88; CHECK-P9-NEXT: vperm v2, v2, v2, v3
Zi Xuan Wuf940d852019-07-30 05:22:03 +000089; CHECK-P9-NEXT: blr
90;
91; CHECK-P8-BE-LABEL: load_swap10:
92; CHECK-P8-BE: # %bb.0:
93; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
94; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
95; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
96; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
97; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
98; CHECK-P8-BE-NEXT: blr
99;
100; CHECK-P9-BE-LABEL: load_swap10:
101; CHECK-P9-BE: # %bb.0:
102; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
103; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
104; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
105; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
106; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
107; CHECK-P9-BE-NEXT: blr
108 %v1 = load <4 x i32>, <4 x i32>* %vp1
109 %v2 = load <4 x i32>, <4 x i32>* %vp2
110 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
111 ret <4 x i32> %v3
112}
113
114define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) {
115; CHECK-P8-LABEL: load_swap11:
116; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000117; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
118; CHECK-P8-NEXT: lvx v3, 0, r4
119; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
120; CHECK-P8-NEXT: lvx v2, 0, r3
121; CHECK-P8-NEXT: vperm v2, v3, v3, v2
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000122; CHECK-P8-NEXT: blr
123;
124; CHECK-P9-LABEL: load_swap11:
125; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000126; CHECK-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha
127; CHECK-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l
128; CHECK-P9-NEXT: lxv v2, 0(r4)
129; CHECK-P9-NEXT: lxvx v3, 0, r3
130; CHECK-P9-NEXT: vperm v2, v2, v2, v3
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000131; CHECK-P9-NEXT: blr
132;
133; CHECK-P8-BE-LABEL: load_swap11:
134; CHECK-P8-BE: # %bb.0:
135; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
136; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
137; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
138; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
139; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
140; CHECK-P8-BE-NEXT: blr
141;
142; CHECK-P9-BE-LABEL: load_swap11:
143; CHECK-P9-BE: # %bb.0:
144; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
145; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
146; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
147; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
148; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
149; CHECK-P9-BE-NEXT: blr
150 %v1 = load <4 x i32>, <4 x i32>* %vp1
151 %v2 = load <4 x i32>, <4 x i32>* %vp2
152 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
153 ret <4 x i32> %v3
154}
155
156define <8 x i16> @load_swap20(<8 x i16>* %vp1, <8 x i16>* %vp2){
157; CHECK-P8-LABEL: load_swap20:
158; CHECK-P8: # %bb.0:
159; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha
160; CHECK-P8-NEXT: lvx v3, 0, r3
161; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l
162; CHECK-P8-NEXT: lvx v2, 0, r4
163; CHECK-P8-NEXT: vperm v2, v3, v3, v2
164; CHECK-P8-NEXT: blr
165;
166; CHECK-P9-LABEL: load_swap20:
167; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000168; CHECK-P9-NEXT: lxv v2, 0(r3)
169; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha
170; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l
171; CHECK-P9-NEXT: lxvx v3, 0, r3
172; CHECK-P9-NEXT: vperm v2, v2, v2, v3
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000173; CHECK-P9-NEXT: blr
174;
175; CHECK-P8-BE-LABEL: load_swap20:
176; CHECK-P8-BE: # %bb.0:
177; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha
178; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
179; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l
180; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
181; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
182; CHECK-P8-BE-NEXT: blr
183;
184; CHECK-P9-BE-LABEL: load_swap20:
185; CHECK-P9-BE: # %bb.0:
186; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
187; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
188; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
189; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
190; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
191; CHECK-P9-BE-NEXT: blr
192 %v1 = load <8 x i16>, <8 x i16>* %vp1
193 %v2 = load <8 x i16>, <8 x i16>* %vp2
194 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
195 ret <8 x i16> %v3
196}
197
198define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){
199; CHECK-P8-LABEL: load_swap21:
200; CHECK-P8: # %bb.0:
201; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha
202; CHECK-P8-NEXT: lvx v3, 0, r4
203; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l
204; CHECK-P8-NEXT: lvx v2, 0, r3
205; CHECK-P8-NEXT: vperm v2, v3, v3, v2
206; CHECK-P8-NEXT: blr
207;
208; CHECK-P9-LABEL: load_swap21:
209; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000210; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
211; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
212; CHECK-P9-NEXT: lxv v2, 0(r4)
213; CHECK-P9-NEXT: lxvx v3, 0, r3
214; CHECK-P9-NEXT: vperm v2, v2, v2, v3
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000215; CHECK-P9-NEXT: blr
216;
217; CHECK-P8-BE-LABEL: load_swap21:
218; CHECK-P8-BE: # %bb.0:
219; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
220; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
221; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
222; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
223; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
224; CHECK-P8-BE-NEXT: blr
225;
226; CHECK-P9-BE-LABEL: load_swap21:
227; CHECK-P9-BE: # %bb.0:
228; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
229; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
230; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
231; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
232; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
233; CHECK-P9-BE-NEXT: blr
234 %v1 = load <8 x i16>, <8 x i16>* %vp1
235 %v2 = load <8 x i16>, <8 x i16>* %vp2
236 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
237 ret <8 x i16> %v3
238}
239
240define <16 x i8> @load_swap30(<16 x i8>* %vp1, <16 x i8>* %vp2){
241; CHECK-P8-LABEL: load_swap30:
242; CHECK-P8: # %bb.0:
243; CHECK-P8-NEXT: addis r4, r2, .LCPI6_0@toc@ha
244; CHECK-P8-NEXT: lvx v3, 0, r3
245; CHECK-P8-NEXT: addi r4, r4, .LCPI6_0@toc@l
246; CHECK-P8-NEXT: lvx v2, 0, r4
247; CHECK-P8-NEXT: vperm v2, v3, v3, v2
248; CHECK-P8-NEXT: blr
249;
250; CHECK-P9-LABEL: load_swap30:
251; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000252; CHECK-P9-NEXT: lxv vs0, 0(r3)
253; CHECK-P9-NEXT: xxbrq v2, vs0
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000254; CHECK-P9-NEXT: blr
255;
256; CHECK-P8-BE-LABEL: load_swap30:
257; CHECK-P8-BE: # %bb.0:
258; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha
259; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
260; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l
261; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
262; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
263; CHECK-P8-BE-NEXT: blr
264;
265; CHECK-P9-BE-LABEL: load_swap30:
266; CHECK-P9-BE: # %bb.0:
267; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
268; CHECK-P9-BE-NEXT: xxbrq v2, vs0
269; CHECK-P9-BE-NEXT: blr
270 %v1 = load <16 x i8>, <16 x i8>* %vp1
271 %v2 = load <16 x i8>, <16 x i8>* %vp2
272 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
273 ret <16 x i8> %v3
274}
275
276define <16 x i8> @load_swap31(<16 x i8>* %vp1, <16 x i8>* %vp2){
277; CHECK-P8-LABEL: load_swap31:
278; CHECK-P8: # %bb.0:
279; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha
280; CHECK-P8-NEXT: lvx v3, 0, r4
281; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l
282; CHECK-P8-NEXT: lvx v2, 0, r3
283; CHECK-P8-NEXT: vperm v2, v3, v3, v2
284; CHECK-P8-NEXT: blr
285;
286; CHECK-P9-LABEL: load_swap31:
287; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000288; CHECK-P9-NEXT: lxv vs0, 0(r4)
289; CHECK-P9-NEXT: xxbrq v2, vs0
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000290; CHECK-P9-NEXT: blr
291;
292; CHECK-P8-BE-LABEL: load_swap31:
293; CHECK-P8-BE: # %bb.0:
294; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha
295; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
296; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l
297; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
298; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
299; CHECK-P8-BE-NEXT: blr
300;
301; CHECK-P9-BE-LABEL: load_swap31:
302; CHECK-P9-BE: # %bb.0:
303; CHECK-P9-BE-NEXT: lxv vs0, 0(r4)
304; CHECK-P9-BE-NEXT: xxbrq v2, vs0
305; CHECK-P9-BE-NEXT: blr
306 %v1 = load <16 x i8>, <16 x i8>* %vp1
307 %v2 = load <16 x i8>, <16 x i8>* %vp2
308 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
309 ret <16 x i8> %v3
310}
311
312define <2 x double> @load_swap40(<2 x double>* %vp1, <2 x double>* %vp2) {
313; CHECK-P8-LABEL: load_swap40:
314; CHECK-P8: # %bb.0:
315; CHECK-P8-NEXT: lxvd2x v2, 0, r4
316; CHECK-P8-NEXT: blr
317;
318; CHECK-P9-LABEL: load_swap40:
319; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000320; CHECK-P9-NEXT: lxv vs0, 0(r4)
321; CHECK-P9-NEXT: xxswapd v2, vs0
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000322; CHECK-P9-NEXT: blr
323;
324; CHECK-P8-BE-LABEL: load_swap40:
325; CHECK-P8-BE: # %bb.0:
326; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r4
327; CHECK-P8-BE-NEXT: xxswapd v2, vs0
328; CHECK-P8-BE-NEXT: blr
329;
330; CHECK-P9-BE-LABEL: load_swap40:
331; CHECK-P9-BE: # %bb.0:
332; CHECK-P9-BE-NEXT: lxv vs0, 0(r4)
333; CHECK-P9-BE-NEXT: xxswapd v2, vs0
334; CHECK-P9-BE-NEXT: blr
335 %v1 = load <2 x double>, <2 x double>* %vp1
336 %v2 = load <2 x double>, <2 x double>* %vp2
337 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
338 ret <2 x double> %v3
339}
340
341define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) {
342; CHECK-P8-LABEL: load_swap50:
343; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000344; CHECK-P8-NEXT: addis r4, r2, .LCPI9_0@toc@ha
345; CHECK-P8-NEXT: lvx v3, 0, r3
346; CHECK-P8-NEXT: addi r4, r4, .LCPI9_0@toc@l
347; CHECK-P8-NEXT: lvx v2, 0, r4
348; CHECK-P8-NEXT: vperm v2, v3, v3, v2
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000349; CHECK-P8-NEXT: blr
350;
351; CHECK-P9-LABEL: load_swap50:
352; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000353; CHECK-P9-NEXT: lxv v2, 0(r3)
354; CHECK-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha
355; CHECK-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l
356; CHECK-P9-NEXT: lxvx v3, 0, r3
357; CHECK-P9-NEXT: vperm v2, v2, v2, v3
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000358; CHECK-P9-NEXT: blr
359;
360; CHECK-P8-BE-LABEL: load_swap50:
361; CHECK-P8-BE: # %bb.0:
362; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI9_0@toc@ha
363; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
364; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI9_0@toc@l
365; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
366; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
367; CHECK-P8-BE-NEXT: blr
368;
369; CHECK-P9-BE-LABEL: load_swap50:
370; CHECK-P9-BE: # %bb.0:
371; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
372; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
373; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI9_0@toc@l
374; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
375; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
376; CHECK-P9-BE-NEXT: blr
377 %v1 = load <4 x float>, <4 x float>* %vp1
378 %v2 = load <4 x float>, <4 x float>* %vp2
379 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
380 ret <4 x float> %v3
381}
382
383define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) {
384; CHECK-P8-LABEL: load_swap51:
385; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000386; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha
387; CHECK-P8-NEXT: lvx v3, 0, r4
388; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l
389; CHECK-P8-NEXT: lvx v2, 0, r3
390; CHECK-P8-NEXT: vperm v2, v3, v3, v2
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000391; CHECK-P8-NEXT: blr
392;
393; CHECK-P9-LABEL: load_swap51:
394; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000395; CHECK-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha
396; CHECK-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l
397; CHECK-P9-NEXT: lxv v2, 0(r4)
398; CHECK-P9-NEXT: lxvx v3, 0, r3
399; CHECK-P9-NEXT: vperm v2, v2, v2, v3
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000400; CHECK-P9-NEXT: blr
401;
402; CHECK-P8-BE-LABEL: load_swap51:
403; CHECK-P8-BE: # %bb.0:
404; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha
405; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
406; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l
407; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
408; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
409; CHECK-P8-BE-NEXT: blr
410;
411; CHECK-P9-BE-LABEL: load_swap51:
412; CHECK-P9-BE: # %bb.0:
413; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha
414; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l
415; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
416; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
417; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
418; CHECK-P9-BE-NEXT: blr
419 %v1 = load <4 x float>, <4 x float>* %vp1
420 %v2 = load <4 x float>, <4 x float>* %vp2
421 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
422 ret <4 x float> %v3
423}
424
425define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) {
426; CHECK-P8-LABEL: swap_store00:
427; CHECK-P8: # %bb.0:
428; CHECK-P8-NEXT: stxvd2x v2, 0, r7
429; CHECK-P8-NEXT: blr
430;
431; CHECK-P9-LABEL: swap_store00:
432; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000433; CHECK-P9-NEXT: xxswapd vs0, v2
434; CHECK-P9-NEXT: stxv vs0, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000435; CHECK-P9-NEXT: blr
436;
437; CHECK-P8-BE-LABEL: swap_store00:
438; CHECK-P8-BE: # %bb.0:
439; CHECK-P8-BE-NEXT: xxswapd vs0, v2
440; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
441; CHECK-P8-BE-NEXT: blr
442;
443; CHECK-P9-BE-LABEL: swap_store00:
444; CHECK-P9-BE: # %bb.0:
445; CHECK-P9-BE-NEXT: xxswapd vs0, v2
446; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
447; CHECK-P9-BE-NEXT: blr
448 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
449 store <2 x i64> %v3, <2 x i64>* %vp
450 ret void
451}
452
453define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) {
454; CHECK-P8-LABEL: swap_store01:
455; CHECK-P8: # %bb.0:
456; CHECK-P8-NEXT: stxvd2x v3, 0, r7
457; CHECK-P8-NEXT: blr
458;
459; CHECK-P9-LABEL: swap_store01:
460; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000461; CHECK-P9-NEXT: xxswapd vs0, v3
462; CHECK-P9-NEXT: stxv vs0, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000463; CHECK-P9-NEXT: blr
464;
465; CHECK-P8-BE-LABEL: swap_store01:
466; CHECK-P8-BE: # %bb.0:
467; CHECK-P8-BE-NEXT: xxswapd vs0, v3
468; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
469; CHECK-P8-BE-NEXT: blr
470;
471; CHECK-P9-BE-LABEL: swap_store01:
472; CHECK-P9-BE: # %bb.0:
473; CHECK-P9-BE-NEXT: xxswapd vs0, v3
474; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
475; CHECK-P9-BE-NEXT: blr
476 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
477 store <2 x i64> %v3, <2 x i64>* %vp
478 ret void
479}
480
481define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
482; CHECK-P8-LABEL: swap_store10:
483; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000484; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha
485; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l
486; CHECK-P8-NEXT: lvx v3, 0, r3
487; CHECK-P8-NEXT: vperm v2, v2, v2, v3
488; CHECK-P8-NEXT: stvx v2, 0, r7
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000489; CHECK-P8-NEXT: blr
490;
491; CHECK-P9-LABEL: swap_store10:
492; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000493; CHECK-P9-NEXT: addis r3, r2, .LCPI13_0@toc@ha
494; CHECK-P9-NEXT: addi r3, r3, .LCPI13_0@toc@l
495; CHECK-P9-NEXT: lxvx v3, 0, r3
496; CHECK-P9-NEXT: vperm v2, v2, v2, v3
497; CHECK-P9-NEXT: stxv v2, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000498; CHECK-P9-NEXT: blr
499;
500; CHECK-P8-BE-LABEL: swap_store10:
501; CHECK-P8-BE: # %bb.0:
502; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha
503; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l
504; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
505; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
506; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
507; CHECK-P8-BE-NEXT: blr
508;
509; CHECK-P9-BE-LABEL: swap_store10:
510; CHECK-P9-BE: # %bb.0:
511; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha
512; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l
513; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
514; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
515; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
516; CHECK-P9-BE-NEXT: blr
517 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
518 store <4 x i32> %v3, <4 x i32>* %vp
519 ret void
520}
521
522define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
523; CHECK-P8-LABEL: swap_store11:
524; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000525; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha
526; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l
527; CHECK-P8-NEXT: lvx v2, 0, r3
528; CHECK-P8-NEXT: vperm v2, v3, v3, v2
529; CHECK-P8-NEXT: stvx v2, 0, r7
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000530; CHECK-P8-NEXT: blr
531;
532; CHECK-P9-LABEL: swap_store11:
533; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000534; CHECK-P9-NEXT: addis r3, r2, .LCPI14_0@toc@ha
535; CHECK-P9-NEXT: addi r3, r3, .LCPI14_0@toc@l
536; CHECK-P9-NEXT: lxvx v2, 0, r3
537; CHECK-P9-NEXT: vperm v2, v3, v3, v2
538; CHECK-P9-NEXT: stxv v2, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000539; CHECK-P9-NEXT: blr
540;
541; CHECK-P8-BE-LABEL: swap_store11:
542; CHECK-P8-BE: # %bb.0:
543; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha
544; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l
545; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
546; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
547; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
548; CHECK-P8-BE-NEXT: blr
549;
550; CHECK-P9-BE-LABEL: swap_store11:
551; CHECK-P9-BE: # %bb.0:
552; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha
553; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l
554; CHECK-P9-BE-NEXT: lxvx v2, 0, r3
555; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2
556; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
557; CHECK-P9-BE-NEXT: blr
558 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
559 store <4 x i32> %v3, <4 x i32>* %vp
560 ret void
561}
562
563define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
564; CHECK-P8-LABEL: swap_store20:
565; CHECK-P8: # %bb.0:
566; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha
567; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l
568; CHECK-P8-NEXT: lvx v3, 0, r3
569; CHECK-P8-NEXT: vperm v2, v2, v2, v3
570; CHECK-P8-NEXT: stvx v2, 0, r7
571; CHECK-P8-NEXT: blr
572;
573; CHECK-P9-LABEL: swap_store20:
574; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000575; CHECK-P9-NEXT: addis r3, r2, .LCPI15_0@toc@ha
576; CHECK-P9-NEXT: addi r3, r3, .LCPI15_0@toc@l
577; CHECK-P9-NEXT: lxvx v3, 0, r3
578; CHECK-P9-NEXT: vperm v2, v2, v2, v3
579; CHECK-P9-NEXT: stxv v2, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000580; CHECK-P9-NEXT: blr
581;
582; CHECK-P8-BE-LABEL: swap_store20:
583; CHECK-P8-BE: # %bb.0:
584; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha
585; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l
586; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
587; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
588; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
589; CHECK-P8-BE-NEXT: blr
590;
591; CHECK-P9-BE-LABEL: swap_store20:
592; CHECK-P9-BE: # %bb.0:
593; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha
594; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l
595; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
596; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
597; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
598; CHECK-P9-BE-NEXT: blr
599 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
600 store <8 x i16> %v3, <8 x i16>* %vp
601 ret void
602}
603
604define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
605; CHECK-P8-LABEL: swap_store21:
606; CHECK-P8: # %bb.0:
607; CHECK-P8-NEXT: addis r3, r2, .LCPI16_0@toc@ha
608; CHECK-P8-NEXT: addi r3, r3, .LCPI16_0@toc@l
609; CHECK-P8-NEXT: lvx v2, 0, r3
610; CHECK-P8-NEXT: vperm v2, v3, v3, v2
611; CHECK-P8-NEXT: stvx v2, 0, r7
612; CHECK-P8-NEXT: blr
613;
614; CHECK-P9-LABEL: swap_store21:
615; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000616; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha
617; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l
618; CHECK-P9-NEXT: lxvx v2, 0, r3
619; CHECK-P9-NEXT: vperm v2, v3, v3, v2
620; CHECK-P9-NEXT: stxv v2, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000621; CHECK-P9-NEXT: blr
622;
623; CHECK-P8-BE-LABEL: swap_store21:
624; CHECK-P8-BE: # %bb.0:
625; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
626; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
627; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
628; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
629; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
630; CHECK-P8-BE-NEXT: blr
631;
632; CHECK-P9-BE-LABEL: swap_store21:
633; CHECK-P9-BE: # %bb.0:
634; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
635; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
636; CHECK-P9-BE-NEXT: lxvx v2, 0, r3
637; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2
638; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
639; CHECK-P9-BE-NEXT: blr
640 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
641 store <8 x i16> %v3, <8 x i16>* %vp
642 ret void
643}
644
645define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
646; CHECK-P8-LABEL: swap_store30:
647; CHECK-P8: # %bb.0:
648; CHECK-P8-NEXT: addis r3, r2, .LCPI17_0@toc@ha
649; CHECK-P8-NEXT: addi r3, r3, .LCPI17_0@toc@l
650; CHECK-P8-NEXT: lvx v3, 0, r3
651; CHECK-P8-NEXT: vperm v2, v2, v2, v3
652; CHECK-P8-NEXT: stvx v2, 0, r7
653; CHECK-P8-NEXT: blr
654;
655; CHECK-P9-LABEL: swap_store30:
656; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000657; CHECK-P9-NEXT: xxbrq vs0, v2
658; CHECK-P9-NEXT: stxv vs0, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000659; CHECK-P9-NEXT: blr
660;
661; CHECK-P8-BE-LABEL: swap_store30:
662; CHECK-P8-BE: # %bb.0:
663; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha
664; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI17_0@toc@l
665; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
666; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
667; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
668; CHECK-P8-BE-NEXT: blr
669;
670; CHECK-P9-BE-LABEL: swap_store30:
671; CHECK-P9-BE: # %bb.0:
672; CHECK-P9-BE-NEXT: xxbrq vs0, v2
673; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
674; CHECK-P9-BE-NEXT: blr
675 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
676 store <16 x i8> %v3, <16 x i8>* %vp
677 ret void
678}
679
680define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
681; CHECK-P8-LABEL: swap_store31:
682; CHECK-P8: # %bb.0:
683; CHECK-P8-NEXT: addis r3, r2, .LCPI18_0@toc@ha
684; CHECK-P8-NEXT: addi r3, r3, .LCPI18_0@toc@l
685; CHECK-P8-NEXT: lvx v2, 0, r3
686; CHECK-P8-NEXT: vperm v2, v3, v3, v2
687; CHECK-P8-NEXT: stvx v2, 0, r7
688; CHECK-P8-NEXT: blr
689;
690; CHECK-P9-LABEL: swap_store31:
691; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000692; CHECK-P9-NEXT: xxbrq vs0, v3
693; CHECK-P9-NEXT: stxv vs0, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000694; CHECK-P9-NEXT: blr
695;
696; CHECK-P8-BE-LABEL: swap_store31:
697; CHECK-P8-BE: # %bb.0:
698; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha
699; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI18_0@toc@l
700; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
701; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
702; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
703; CHECK-P8-BE-NEXT: blr
704;
705; CHECK-P9-BE-LABEL: swap_store31:
706; CHECK-P9-BE: # %bb.0:
707; CHECK-P9-BE-NEXT: xxbrq vs0, v3
708; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
709; CHECK-P9-BE-NEXT: blr
710 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
711 store <16 x i8> %v3, <16 x i8>* %vp
712 ret void
713}
714
715define void @swap_store40(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) {
716; CHECK-P8-LABEL: swap_store40:
717; CHECK-P8: # %bb.0:
718; CHECK-P8-NEXT: stxvd2x v2, 0, r7
719; CHECK-P8-NEXT: blr
720;
721; CHECK-P9-LABEL: swap_store40:
722; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000723; CHECK-P9-NEXT: xxswapd vs0, v2
724; CHECK-P9-NEXT: stxv vs0, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000725; CHECK-P9-NEXT: blr
726;
727; CHECK-P8-BE-LABEL: swap_store40:
728; CHECK-P8-BE: # %bb.0:
729; CHECK-P8-BE-NEXT: xxswapd vs0, v2
730; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
731; CHECK-P8-BE-NEXT: blr
732;
733; CHECK-P9-BE-LABEL: swap_store40:
734; CHECK-P9-BE: # %bb.0:
735; CHECK-P9-BE-NEXT: xxswapd vs0, v2
736; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
737; CHECK-P9-BE-NEXT: blr
738 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 1, i32 0>
739 store <2 x double> %v3, <2 x double>* %vp
740 ret void
741}
742
743define void @swap_store41(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) {
744; CHECK-P8-LABEL: swap_store41:
745; CHECK-P8: # %bb.0:
746; CHECK-P8-NEXT: stxvd2x v3, 0, r7
747; CHECK-P8-NEXT: blr
748;
749; CHECK-P9-LABEL: swap_store41:
750; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000751; CHECK-P9-NEXT: xxswapd vs0, v3
752; CHECK-P9-NEXT: stxv vs0, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000753; CHECK-P9-NEXT: blr
754;
755; CHECK-P8-BE-LABEL: swap_store41:
756; CHECK-P8-BE: # %bb.0:
757; CHECK-P8-BE-NEXT: xxswapd vs0, v3
758; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
759; CHECK-P8-BE-NEXT: blr
760;
761; CHECK-P9-BE-LABEL: swap_store41:
762; CHECK-P9-BE: # %bb.0:
763; CHECK-P9-BE-NEXT: xxswapd vs0, v3
764; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
765; CHECK-P9-BE-NEXT: blr
766 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
767 store <2 x double> %v3, <2 x double>* %vp
768 ret void
769}
770
771define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
772; CHECK-P8-LABEL: swap_store50:
773; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000774; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha
775; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l
776; CHECK-P8-NEXT: lvx v3, 0, r3
777; CHECK-P8-NEXT: vperm v2, v2, v2, v3
778; CHECK-P8-NEXT: stvx v2, 0, r7
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000779; CHECK-P8-NEXT: blr
780;
781; CHECK-P9-LABEL: swap_store50:
782; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000783; CHECK-P9-NEXT: addis r3, r2, .LCPI21_0@toc@ha
784; CHECK-P9-NEXT: addi r3, r3, .LCPI21_0@toc@l
785; CHECK-P9-NEXT: lxvx v3, 0, r3
786; CHECK-P9-NEXT: vperm v2, v2, v2, v3
787; CHECK-P9-NEXT: stxv v2, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000788; CHECK-P9-NEXT: blr
789;
790; CHECK-P8-BE-LABEL: swap_store50:
791; CHECK-P8-BE: # %bb.0:
792; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha
793; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l
794; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
795; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
796; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
797; CHECK-P8-BE-NEXT: blr
798;
799; CHECK-P9-BE-LABEL: swap_store50:
800; CHECK-P9-BE: # %bb.0:
801; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha
802; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l
803; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
804; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
805; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
806; CHECK-P9-BE-NEXT: blr
807 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
808 store <4 x float> %v3, <4 x float>* %vp
809 ret void
810}
811
812define void @swap_store51(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
813; CHECK-P8-LABEL: swap_store51:
814; CHECK-P8: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000815; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha
816; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l
817; CHECK-P8-NEXT: lvx v2, 0, r3
818; CHECK-P8-NEXT: vperm v2, v3, v3, v2
819; CHECK-P8-NEXT: stvx v2, 0, r7
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000820; CHECK-P8-NEXT: blr
821;
822; CHECK-P9-LABEL: swap_store51:
823; CHECK-P9: # %bb.0:
Zi Xuan Wu54d446f2019-07-31 07:03:42 +0000824; CHECK-P9-NEXT: addis r3, r2, .LCPI22_0@toc@ha
825; CHECK-P9-NEXT: addi r3, r3, .LCPI22_0@toc@l
826; CHECK-P9-NEXT: lxvx v2, 0, r3
827; CHECK-P9-NEXT: vperm v2, v3, v3, v2
828; CHECK-P9-NEXT: stxv v2, 0(r7)
Zi Xuan Wuf940d852019-07-30 05:22:03 +0000829; CHECK-P9-NEXT: blr
830;
831; CHECK-P8-BE-LABEL: swap_store51:
832; CHECK-P8-BE: # %bb.0:
833; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha
834; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l
835; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
836; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
837; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
838; CHECK-P8-BE-NEXT: blr
839;
840; CHECK-P9-BE-LABEL: swap_store51:
841; CHECK-P9-BE: # %bb.0:
842; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha
843; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l
844; CHECK-P9-BE-NEXT: lxvx v2, 0, r3
845; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2
846; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
847; CHECK-P9-BE-NEXT: blr
848 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
849 store <4 x float> %v3, <4 x float>* %vp
850 ret void
851}