blob: d054bfda615e0059d54d0dddf735414e728e09e9 [file] [log] [blame]
James Molloy1ea64732012-04-05 10:01:12 +00001; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
2
Stephen Lind24ab202013-07-14 06:24:09 +00003; CHECK-LABEL: f:
James Molloy1ea64732012-04-05 10:01:12 +00004define float @f(<4 x i16>* nocapture %in) {
Ahmed Bougacha4200cc92015-03-05 19:37:53 +00005 ; CHECK: vld1
James Molloy1ea64732012-04-05 10:01:12 +00006 ; CHECK: vmovl.u16
David Blaikiea79ac142015-02-27 21:17:42 +00007 %1 = load <4 x i16>, <4 x i16>* %in
James Molloy1ea64732012-04-05 10:01:12 +00008 ; CHECK: vcvt.f32.u32
9 %2 = uitofp <4 x i16> %1 to <4 x float>
10 %3 = extractelement <4 x float> %2, i32 0
11 %4 = extractelement <4 x float> %2, i32 1
12 %5 = extractelement <4 x float> %2, i32 2
13
14 ; CHECK: vadd.f32
15 %6 = fadd float %3, %4
16 %7 = fadd float %6, %5
17
18 ret float %7
19}
20
Stephen Lind24ab202013-07-14 06:24:09 +000021; CHECK-LABEL: g:
James Molloy1ea64732012-04-05 10:01:12 +000022define float @g(<4 x i8>* nocapture %in) {
Tim Northover3de97b72012-04-26 08:46:29 +000023; Note: vld1 here is reasonably important. Mixing VFP and NEON
24; instructions is bad on some cores
25 ; CHECK: vld1
James Molloy1ea64732012-04-05 10:01:12 +000026 ; CHECK: vmovl.u8
27 ; CHECK: vmovl.u16
David Blaikiea79ac142015-02-27 21:17:42 +000028 %1 = load <4 x i8>, <4 x i8>* %in
James Molloy1ea64732012-04-05 10:01:12 +000029 ; CHECK: vcvt.f32.u32
30 %2 = uitofp <4 x i8> %1 to <4 x float>
31 %3 = extractelement <4 x float> %2, i32 0
32 %4 = extractelement <4 x float> %2, i32 1
33 %5 = extractelement <4 x float> %2, i32 2
34
35 ; CHECK: vadd.f32
36 %6 = fadd float %3, %4
37 %7 = fadd float %6, %5
38
39 ret float %7
40}
41
Stephen Lind24ab202013-07-14 06:24:09 +000042; CHECK-LABEL: h:
James Molloy1ea64732012-04-05 10:01:12 +000043define <4 x i8> @h(<4 x float> %v) {
44 ; CHECK: vcvt.{{[us]}}32.f32
45 ; CHECK: vmovn.i32
46 %1 = fptoui <4 x float> %v to <4 x i8>
47 ret <4 x i8> %1
48}
James Molloya9bcf202012-04-17 08:18:00 +000049
Stephen Lind24ab202013-07-14 06:24:09 +000050; CHECK-LABEL: i:
James Molloya9bcf202012-04-17 08:18:00 +000051define <4 x i8> @i(<4 x i8>* %x) {
Tim Northover3de97b72012-04-26 08:46:29 +000052; Note: vld1 here is reasonably important. Mixing VFP and NEON
53; instructions is bad on some cores
54 ; CHECK: vld1
James Molloya9bcf202012-04-17 08:18:00 +000055 ; CHECK: vmovl.s8
56 ; CHECK: vmovl.s16
57 ; CHECK: vrecpe
58 ; CHECK: vrecps
59 ; CHECK: vmul
60 ; CHECK: vmovn
David Blaikiea79ac142015-02-27 21:17:42 +000061 %1 = load <4 x i8>, <4 x i8>* %x, align 4
James Molloya9bcf202012-04-17 08:18:00 +000062 %2 = sdiv <4 x i8> zeroinitializer, %1
63 ret <4 x i8> %2
64}
Stephen Lind24ab202013-07-14 06:24:09 +000065; CHECK-LABEL: j:
Silviu Baranga3f40d872012-09-05 08:57:21 +000066define <4 x i32> @j(<4 x i8>* %in) nounwind {
67 ; CHECK: vld1
68 ; CHECK: vmovl.u8
69 ; CHECK: vmovl.u16
70 ; CHECK-NOT: vand
David Blaikiea79ac142015-02-27 21:17:42 +000071 %1 = load <4 x i8>, <4 x i8>* %in, align 4
Silviu Baranga3f40d872012-09-05 08:57:21 +000072 %2 = zext <4 x i8> %1 to <4 x i32>
73 ret <4 x i32> %2
74}
75