| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s |
| 2 | |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 3 | ; CHECK-LABEL: f: |
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 4 | define float @f(<4 x i16>* nocapture %in) { |
| Ahmed Bougacha | 4200cc9 | 2015-03-05 19:37:53 +0000 | [diff] [blame] | 5 | ; CHECK: vld1 |
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 6 | ; CHECK: vmovl.u16 |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 7 | %1 = load <4 x i16>, <4 x i16>* %in |
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 8 | ; CHECK: vcvt.f32.u32 |
| 9 | %2 = uitofp <4 x i16> %1 to <4 x float> |
| 10 | %3 = extractelement <4 x float> %2, i32 0 |
| 11 | %4 = extractelement <4 x float> %2, i32 1 |
| 12 | %5 = extractelement <4 x float> %2, i32 2 |
| 13 | |
| 14 | ; CHECK: vadd.f32 |
| 15 | %6 = fadd float %3, %4 |
| 16 | %7 = fadd float %6, %5 |
| 17 | |
| 18 | ret float %7 |
| 19 | } |
| 20 | |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 21 | ; CHECK-LABEL: g: |
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 22 | define float @g(<4 x i8>* nocapture %in) { |
| Tim Northover | 3de97b7 | 2012-04-26 08:46:29 +0000 | [diff] [blame] | 23 | ; Note: vld1 here is reasonably important. Mixing VFP and NEON |
| 24 | ; instructions is bad on some cores |
| 25 | ; CHECK: vld1 |
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 26 | ; CHECK: vmovl.u8 |
| 27 | ; CHECK: vmovl.u16 |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 28 | %1 = load <4 x i8>, <4 x i8>* %in |
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 29 | ; CHECK: vcvt.f32.u32 |
| 30 | %2 = uitofp <4 x i8> %1 to <4 x float> |
| 31 | %3 = extractelement <4 x float> %2, i32 0 |
| 32 | %4 = extractelement <4 x float> %2, i32 1 |
| 33 | %5 = extractelement <4 x float> %2, i32 2 |
| 34 | |
| 35 | ; CHECK: vadd.f32 |
| 36 | %6 = fadd float %3, %4 |
| 37 | %7 = fadd float %6, %5 |
| 38 | |
| 39 | ret float %7 |
| 40 | } |
| 41 | |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 42 | ; CHECK-LABEL: h: |
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 43 | define <4 x i8> @h(<4 x float> %v) { |
| 44 | ; CHECK: vcvt.{{[us]}}32.f32 |
| 45 | ; CHECK: vmovn.i32 |
| 46 | %1 = fptoui <4 x float> %v to <4 x i8> |
| 47 | ret <4 x i8> %1 |
| 48 | } |
| James Molloy | a9bcf20 | 2012-04-17 08:18:00 +0000 | [diff] [blame] | 49 | |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 50 | ; CHECK-LABEL: i: |
| James Molloy | a9bcf20 | 2012-04-17 08:18:00 +0000 | [diff] [blame] | 51 | define <4 x i8> @i(<4 x i8>* %x) { |
| Tim Northover | 3de97b7 | 2012-04-26 08:46:29 +0000 | [diff] [blame] | 52 | ; Note: vld1 here is reasonably important. Mixing VFP and NEON |
| 53 | ; instructions is bad on some cores |
| 54 | ; CHECK: vld1 |
| James Molloy | a9bcf20 | 2012-04-17 08:18:00 +0000 | [diff] [blame] | 55 | ; CHECK: vmovl.s8 |
| 56 | ; CHECK: vmovl.s16 |
| 57 | ; CHECK: vrecpe |
| 58 | ; CHECK: vrecps |
| 59 | ; CHECK: vmul |
| 60 | ; CHECK: vmovn |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 61 | %1 = load <4 x i8>, <4 x i8>* %x, align 4 |
| James Molloy | a9bcf20 | 2012-04-17 08:18:00 +0000 | [diff] [blame] | 62 | %2 = sdiv <4 x i8> zeroinitializer, %1 |
| 63 | ret <4 x i8> %2 |
| 64 | } |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 65 | ; CHECK-LABEL: j: |
| Silviu Baranga | 3f40d87 | 2012-09-05 08:57:21 +0000 | [diff] [blame] | 66 | define <4 x i32> @j(<4 x i8>* %in) nounwind { |
| 67 | ; CHECK: vld1 |
| 68 | ; CHECK: vmovl.u8 |
| 69 | ; CHECK: vmovl.u16 |
| 70 | ; CHECK-NOT: vand |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 71 | %1 = load <4 x i8>, <4 x i8>* %in, align 4 |
| Silviu Baranga | 3f40d87 | 2012-09-05 08:57:21 +0000 | [diff] [blame] | 72 | %2 = zext <4 x i8> %1 to <4 x i32> |
| 73 | ret <4 x i32> %2 |
| 74 | } |
| 75 | |