blob: 397d10e02e2444bff9017f9a24f3ae2668b3402f [file] [log] [blame]
Ulrich Weigand2b3482f2017-07-17 17:41:11 +00001; Test vector intrinsics added with z14.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
4
5declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>)
6declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32)
7declare <16 x i8> @llvm.s390.vlrl(i32, i8 *)
8declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *)
Ulrich Weigand33435c42017-07-17 17:42:48 +00009
10declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>)
11declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>)
12declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>)
13declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32)
14declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32)
15
Ulrich Weigand2b3482f2017-07-17 17:41:11 +000016declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32)
17declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32)
Ulrich Weigand33435c42017-07-17 17:42:48 +000018declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32)
19declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32)
Ulrich Weigand2b3482f2017-07-17 17:41:11 +000020
21; VBPERM.
22define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) {
23; CHECK-LABEL: test_vbperm:
24; CHECK: vbperm %v24, %v24, %v26
25; CHECK: br %r14
26 %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b)
27 ret <2 x i64> %res
28}
29
30; VMSLG with no shifts.
31define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
32; CHECK-LABEL: test_vmslg1:
33; CHECK: vmslg %v24, %v24, %v26, %v28, 0
34; CHECK: br %r14
35 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0)
36 ret <16 x i8> %res
37}
38
39; VMSLG with both shifts.
40define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
41; CHECK-LABEL: test_vmslg2:
42; CHECK: vmslg %v24, %v24, %v26, %v28, 12
43; CHECK: br %r14
44 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12)
45 ret <16 x i8> %res
46}
47
48; VLRLR with the lowest in-range displacement.
49define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) {
50; CHECK-LABEL: test_vlrlr1:
51; CHECK: vlrlr %v24, %r3, 0(%r2)
52; CHECK: br %r14
53 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
54 ret <16 x i8> %res
55}
56
57; VLRLR with the highest in-range displacement.
58define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) {
59; CHECK-LABEL: test_vlrlr2:
60; CHECK: vlrlr %v24, %r3, 4095(%r2)
61; CHECK: br %r14
62 %ptr = getelementptr i8, i8 *%base, i64 4095
63 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
64 ret <16 x i8> %res
65}
66
67; VLRLR with an out-of-range displacement.
68define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) {
69; CHECK-LABEL: test_vlrlr3:
70; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}})
71; CHECK: br %r14
72 %ptr = getelementptr i8, i8 *%base, i64 4096
73 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
74 ret <16 x i8> %res
75}
76
77; Check that VLRLR doesn't allow an index.
78define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) {
79; CHECK-LABEL: test_vlrlr4:
80; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}})
81; CHECK: br %r14
82 %ptr = getelementptr i8, i8 *%base, i64 %index
83 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
84 ret <16 x i8> %res
85}
86
87; VLRL with the lowest in-range displacement.
88define <16 x i8> @test_vlrl1(i8 *%ptr) {
89; CHECK-LABEL: test_vlrl1:
90; CHECK: vlrl %v24, 0(%r2), 0
91; CHECK: br %r14
92 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
93 ret <16 x i8> %res
94}
95
96; VLRL with the highest in-range displacement.
97define <16 x i8> @test_vlrl2(i8 *%base) {
98; CHECK-LABEL: test_vlrl2:
99; CHECK: vlrl %v24, 4095(%r2), 0
100; CHECK: br %r14
101 %ptr = getelementptr i8, i8 *%base, i64 4095
102 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
103 ret <16 x i8> %res
104}
105
106; VLRL with an out-of-range displacement.
107define <16 x i8> @test_vlrl3(i8 *%base) {
108; CHECK-LABEL: test_vlrl3:
109; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
110; CHECK: br %r14
111 %ptr = getelementptr i8, i8 *%base, i64 4096
112 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
113 ret <16 x i8> %res
114}
115
116; Check that VLRL doesn't allow an index.
117define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) {
118; CHECK-LABEL: test_vlrl4:
119; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
120; CHECK: br %r14
121 %ptr = getelementptr i8, i8 *%base, i64 %index
122 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
123 ret <16 x i8> %res
124}
125
126; VSTRLR with the lowest in-range displacement.
127define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
128; CHECK-LABEL: test_vstrlr1:
129; CHECK: vstrlr %v24, %r3, 0(%r2)
130; CHECK: br %r14
131 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
132 ret void
133}
134
135; VSTRLR with the highest in-range displacement.
136define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) {
137; CHECK-LABEL: test_vstrlr2:
138; CHECK: vstrlr %v24, %r3, 4095(%r2)
139; CHECK: br %r14
140 %ptr = getelementptr i8, i8 *%base, i64 4095
141 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
142 ret void
143}
144
145; VSTRLR with an out-of-range displacement.
146define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) {
147; CHECK-LABEL: test_vstrlr3:
148; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}})
149; CHECK: br %r14
150 %ptr = getelementptr i8, i8 *%base, i64 4096
151 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
152 ret void
153}
154
155; Check that VSTRLR doesn't allow an index.
156define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
157; CHECK-LABEL: test_vstrlr4:
158; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}})
159; CHECK: br %r14
160 %ptr = getelementptr i8, i8 *%base, i64 %index
161 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
162 ret void
163}
164
165; VSTRL with the lowest in-range displacement.
166define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) {
167; CHECK-LABEL: test_vstrl1:
168; CHECK: vstrl %v24, 0(%r2), 8
169; CHECK: br %r14
170 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
171 ret void
172}
173
174; VSTRL with the highest in-range displacement.
175define void @test_vstrl2(<16 x i8> %vec, i8 *%base) {
176; CHECK-LABEL: test_vstrl2:
177; CHECK: vstrl %v24, 4095(%r2), 8
178; CHECK: br %r14
179 %ptr = getelementptr i8, i8 *%base, i64 4095
180 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
181 ret void
182}
183
184; VSTRL with an out-of-range displacement.
185define void @test_vstrl3(<16 x i8> %vec, i8 *%base) {
186; CHECK-LABEL: test_vstrl3:
187; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
188; CHECK: br %r14
189 %ptr = getelementptr i8, i8 *%base, i64 4096
190 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
191 ret void
192}
193
194; Check that VSTRL doesn't allow an index.
195define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) {
196; CHECK-LABEL: test_vstrl4:
197; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
198; CHECK: br %r14
199 %ptr = getelementptr i8, i8 *%base, i64 %index
200 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
201 ret void
202}
203
Ulrich Weigand33435c42017-07-17 17:42:48 +0000204; VFCESBS with no processing of the result.
205define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) {
206; CHECK-LABEL: test_vfcesbs:
207; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
208; CHECK: ipm %r2
209; CHECK: srl %r2, 28
210; CHECK: br %r14
211 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
212 <4 x float> %b)
213 %res = extractvalue {<4 x i32>, i32} %call, 1
214 ret i32 %res
215}
216
217; VFCESBS, returning 1 if any elements are equal (CC != 3).
218define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) {
219; CHECK-LABEL: test_vfcesbs_any_bool:
220; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
Ulrich Weigand426f6be2018-01-19 20:56:04 +0000221; CHECK: lhi %r2, 0
222; CHECK: lochile %r2, 1
Ulrich Weigand33435c42017-07-17 17:42:48 +0000223; CHECK: br %r14
224 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
225 <4 x float> %b)
226 %res = extractvalue {<4 x i32>, i32} %call, 1
227 %cmp = icmp ne i32 %res, 3
228 %ext = zext i1 %cmp to i32
229 ret i32 %ext
230}
231
232; VFCESBS, storing to %ptr if any elements are equal.
233define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b,
234 i32 *%ptr) {
235; CHECK-LABEL: test_vfcesbs_any_store:
236; CHECK-NOT: %r
237; CHECK: vfcesbs %v24, %v24, %v26
238; CHECK-NEXT: {{bor|bnler}} %r14
239; CHECK: mvhi 0(%r2), 0
240; CHECK: br %r14
241 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
242 <4 x float> %b)
243 %res = extractvalue {<4 x i32>, i32} %call, 0
244 %cc = extractvalue {<4 x i32>, i32} %call, 1
245 %cmp = icmp ule i32 %cc, 2
246 br i1 %cmp, label %store, label %exit
247
248store:
249 store i32 0, i32 *%ptr
250 br label %exit
251
252exit:
253 ret <4 x i32> %res
254}
255
256; VFCHSBS with no processing of the result.
257define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) {
258; CHECK-LABEL: test_vfchsbs:
259; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
260; CHECK: ipm %r2
261; CHECK: srl %r2, 28
262; CHECK: br %r14
263 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
264 <4 x float> %b)
265 %res = extractvalue {<4 x i32>, i32} %call, 1
266 ret i32 %res
267}
268
269; VFCHSBS, returning 1 if not all elements are higher.
270define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) {
271; CHECK-LABEL: test_vfchsbs_notall_bool:
272; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
Ulrich Weigand426f6be2018-01-19 20:56:04 +0000273; CHECK: lhi %r2, 0
274; CHECK: lochinhe %r2, 1
Ulrich Weigand33435c42017-07-17 17:42:48 +0000275; CHECK: br %r14
276 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
277 <4 x float> %b)
278 %res = extractvalue {<4 x i32>, i32} %call, 1
279 %cmp = icmp sge i32 %res, 1
280 %ext = zext i1 %cmp to i32
281 ret i32 %ext
282}
283
284; VFCHSBS, storing to %ptr if not all elements are higher.
285define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b,
286 i32 *%ptr) {
287; CHECK-LABEL: test_vfchsbs_notall_store:
288; CHECK-NOT: %r
289; CHECK: vfchsbs %v24, %v24, %v26
290; CHECK-NEXT: {{bher|ber}} %r14
291; CHECK: mvhi 0(%r2), 0
292; CHECK: br %r14
293 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
294 <4 x float> %b)
295 %res = extractvalue {<4 x i32>, i32} %call, 0
296 %cc = extractvalue {<4 x i32>, i32} %call, 1
297 %cmp = icmp ugt i32 %cc, 0
298 br i1 %cmp, label %store, label %exit
299
300store:
301 store i32 0, i32 *%ptr
302 br label %exit
303
304exit:
305 ret <4 x i32> %res
306}
307
308; VFCHESBS with no processing of the result.
309define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) {
310; CHECK-LABEL: test_vfchesbs:
311; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
312; CHECK: ipm %r2
313; CHECK: srl %r2, 28
314; CHECK: br %r14
315 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
316 <4 x float> %b)
317 %res = extractvalue {<4 x i32>, i32} %call, 1
318 ret i32 %res
319}
320
321; VFCHESBS, returning 1 if neither element is higher or equal.
322define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) {
323; CHECK-LABEL: test_vfchesbs_none_bool:
324; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
Ulrich Weigand426f6be2018-01-19 20:56:04 +0000325; CHECK: lhi %r2, 0
326; CHECK: lochio %r2, 1
Ulrich Weigand33435c42017-07-17 17:42:48 +0000327; CHECK: br %r14
328 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
329 <4 x float> %b)
330 %res = extractvalue {<4 x i32>, i32} %call, 1
331 %cmp = icmp eq i32 %res, 3
332 %ext = zext i1 %cmp to i32
333 ret i32 %ext
334}
335
336; VFCHESBS, storing to %ptr if neither element is higher or equal.
337define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b,
338 i32 *%ptr) {
339; CHECK-LABEL: test_vfchesbs_none_store:
340; CHECK-NOT: %r
341; CHECK: vfchesbs %v24, %v24, %v26
342; CHECK-NEXT: {{bnor|bler}} %r14
343; CHECK: mvhi 0(%r2), 0
344; CHECK: br %r14
345 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
346 <4 x float> %b)
347 %res = extractvalue {<4 x i32>, i32} %call, 0
348 %cc = extractvalue {<4 x i32>, i32} %call, 1
349 %cmp = icmp uge i32 %cc, 3
350 br i1 %cmp, label %store, label %exit
351
352store:
353 store i32 0, i32 *%ptr
354 br label %exit
355
356exit:
357 ret <4 x i32> %res
358}
359
360; VFTCISB with the lowest useful class selector and no processing of the result.
361define i32 @test_vftcisb(<4 x float> %a) {
362; CHECK-LABEL: test_vftcisb:
363; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1
364; CHECK: ipm %r2
365; CHECK: srl %r2, 28
366; CHECK: br %r14
367 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1)
368 %res = extractvalue {<4 x i32>, i32} %call, 1
369 ret i32 %res
370}
371
372; VFTCISB with the highest useful class selector, returning 1 if all elements
373; have the right class (CC == 0).
374define i32 @test_vftcisb_all_bool(<4 x float> %a) {
375; CHECK-LABEL: test_vftcisb_all_bool:
376; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094
Ulrich Weigand426f6be2018-01-19 20:56:04 +0000377; CHECK: lhi %r2, 0
378; CHECK: lochie %r2, 1
Ulrich Weigand33435c42017-07-17 17:42:48 +0000379; CHECK: br %r14
380 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094)
381 %res = extractvalue {<4 x i32>, i32} %call, 1
382 %cmp = icmp eq i32 %res, 0
383 %ext = zext i1 %cmp to i32
384 ret i32 %ext
385}
386
387; VFISB with a rounding mode not usable via standard intrinsics.
388define <4 x float> @test_vfisb_0_4(<4 x float> %a) {
389; CHECK-LABEL: test_vfisb_0_4:
390; CHECK: vfisb %v24, %v24, 0, 4
391; CHECK: br %r14
392 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4)
393 ret <4 x float> %res
394}
395
396; VFISB with IEEE-inexact exception suppressed.
397define <4 x float> @test_vfisb_4_0(<4 x float> %a) {
398; CHECK-LABEL: test_vfisb_4_0:
399; CHECK: vfisb %v24, %v24, 4, 0
400; CHECK: br %r14
401 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0)
402 ret <4 x float> %res
403}
404
Ulrich Weigand2b3482f2017-07-17 17:41:11 +0000405; VFMAXDB.
406define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) {
407; CHECK-LABEL: test_vfmaxdb:
408; CHECK: vfmaxdb %v24, %v24, %v26, 4
409; CHECK: br %r14
410 %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4)
411 ret <2 x double> %res
412}
413
414; VFMINDB.
415define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) {
416; CHECK-LABEL: test_vfmindb:
417; CHECK: vfmindb %v24, %v24, %v26, 4
418; CHECK: br %r14
419 %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4)
420 ret <2 x double> %res
421}
422
Ulrich Weigand33435c42017-07-17 17:42:48 +0000423; VFMAXSB.
424define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) {
425; CHECK-LABEL: test_vfmaxsb:
426; CHECK: vfmaxsb %v24, %v24, %v26, 4
427; CHECK: br %r14
428 %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4)
429 ret <4 x float> %res
430}
431
432; VFMINSB.
433define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) {
434; CHECK-LABEL: test_vfminsb:
435; CHECK: vfminsb %v24, %v24, %v26, 4
436; CHECK: br %r14
437 %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4)
438 ret <4 x float> %res
439}
440