| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s | 
|  | 2 |  | 
|  | 3 | ; CHECK: f: | 
|  | 4 | define float @f(<4 x i16>* nocapture %in) { | 
|  | 5 | ; CHECK: vldr | 
|  | 6 | ; CHECK: vmovl.u16 | 
|  | 7 | %1 = load <4 x i16>* %in | 
|  | 8 | ; CHECK: vcvt.f32.u32 | 
|  | 9 | %2 = uitofp <4 x i16> %1 to <4 x float> | 
|  | 10 | %3 = extractelement <4 x float> %2, i32 0 | 
|  | 11 | %4 = extractelement <4 x float> %2, i32 1 | 
|  | 12 | %5 = extractelement <4 x float> %2, i32 2 | 
|  | 13 |  | 
|  | 14 | ; CHECK: vadd.f32 | 
|  | 15 | %6 = fadd float %3, %4 | 
|  | 16 | %7 = fadd float %6, %5 | 
|  | 17 |  | 
|  | 18 | ret float %7 | 
|  | 19 | } | 
|  | 20 |  | 
|  | 21 | ; CHECK: g: | 
|  | 22 | define float @g(<4 x i8>* nocapture %in) { | 
| Tim Northover | 3de97b7 | 2012-04-26 08:46:29 +0000 | [diff] [blame] | 23 | ; Note: vld1 here is reasonably important. Mixing VFP and NEON | 
|  | 24 | ; instructions is bad on some cores | 
|  | 25 | ; CHECK: vld1 | 
| James Molloy | 1ea6473 | 2012-04-05 10:01:12 +0000 | [diff] [blame] | 26 | ; CHECK: vmovl.u8 | 
|  | 27 | ; CHECK: vmovl.u16 | 
|  | 28 | %1 = load <4 x i8>* %in | 
|  | 29 | ; CHECK: vcvt.f32.u32 | 
|  | 30 | %2 = uitofp <4 x i8> %1 to <4 x float> | 
|  | 31 | %3 = extractelement <4 x float> %2, i32 0 | 
|  | 32 | %4 = extractelement <4 x float> %2, i32 1 | 
|  | 33 | %5 = extractelement <4 x float> %2, i32 2 | 
|  | 34 |  | 
|  | 35 | ; CHECK: vadd.f32 | 
|  | 36 | %6 = fadd float %3, %4 | 
|  | 37 | %7 = fadd float %6, %5 | 
|  | 38 |  | 
|  | 39 | ret float %7 | 
|  | 40 | } | 
|  | 41 |  | 
|  | 42 | ; CHECK: h: | 
|  | 43 | define <4 x i8> @h(<4 x float> %v) { | 
|  | 44 | ; CHECK: vcvt.{{[us]}}32.f32 | 
|  | 45 | ; CHECK: vmovn.i32 | 
|  | 46 | %1 = fptoui <4 x float> %v to <4 x i8> | 
|  | 47 | ret <4 x i8> %1 | 
|  | 48 | } | 
| James Molloy | a9bcf20 | 2012-04-17 08:18:00 +0000 | [diff] [blame] | 49 |  | 
|  | 50 | ; CHECK: i: | 
|  | 51 | define <4 x i8> @i(<4 x i8>* %x) { | 
| Tim Northover | 3de97b7 | 2012-04-26 08:46:29 +0000 | [diff] [blame] | 52 | ; Note: vld1 here is reasonably important. Mixing VFP and NEON | 
|  | 53 | ; instructions is bad on some cores | 
|  | 54 | ; CHECK: vld1 | 
| James Molloy | a9bcf20 | 2012-04-17 08:18:00 +0000 | [diff] [blame] | 55 | ; CHECK: vmovl.s8 | 
|  | 56 | ; CHECK: vmovl.s16 | 
|  | 57 | ; CHECK: vrecpe | 
|  | 58 | ; CHECK: vrecps | 
|  | 59 | ; CHECK: vmul | 
|  | 60 | ; CHECK: vmovn | 
|  | 61 | %1 = load <4 x i8>* %x, align 4 | 
|  | 62 | %2 = sdiv <4 x i8> zeroinitializer, %1 | 
|  | 63 | ret <4 x i8> %2 | 
|  | 64 | } | 
| Silviu Baranga | 3f40d87 | 2012-09-05 08:57:21 +0000 | [diff] [blame] | 65 | ; CHECK: j: | 
|  | 66 | define <4 x i32> @j(<4 x i8>* %in) nounwind { | 
|  | 67 | ; CHECK: vld1 | 
|  | 68 | ; CHECK: vmovl.u8 | 
|  | 69 | ; CHECK: vmovl.u16 | 
|  | 70 | ; CHECK-NOT: vand | 
|  | 71 | %1 = load <4 x i8>* %in, align 4 | 
|  | 72 | %2 = zext <4 x i8> %1 to <4 x i32> | 
|  | 73 | ret <4 x i32> %2 | 
|  | 74 | } | 
|  | 75 |  |