| Chandler Carruth | 1ae5a68 | 2014-03-30 13:40:57 +0000 | [diff] [blame] | 1 | // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -ffreestanding -S -o - -emit-llvm %s | FileCheck %s | 
|  | 2 | // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -ffreestanding -S -o - %s | FileCheck -check-prefix=CHECK-CODEGEN %s | 
| Tim Northover | a2ee433 | 2014-03-29 15:09:45 +0000 | [diff] [blame] | 3 | // REQUIRES: arm64-registered-target | 
|  | 4 | // Test ARM64 SIMD max/min intrinsics | 
|  | 5 |  | 
|  | 6 | #include <arm_neon.h> | 
|  | 7 |  | 
|  | 8 | // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit reduction | 
|  | 9 | int8_t test_vmaxv_s8(int8x8_t a1) { | 
|  | 10 | // CHECK: test_vmaxv_s8 | 
|  | 11 | return vmaxv_s8(a1); | 
|  | 12 | // CHECK @llvm.arm64.neon.smaxv.i32.v8i8 | 
|  | 13 | } | 
|  | 14 |  | 
|  | 15 | uint16_t test_vminvq_u16(uint16x8_t a1) { | 
|  | 16 | // CHECK: test_vminvq_u16 | 
|  | 17 | return vminvq_u16(a1); | 
|  | 18 | // CHECK llvm.arm64.neon.uminv.i16.v8i16 | 
|  | 19 | } | 
|  | 20 |  | 
|  | 21 | // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit pairwise | 
|  | 22 | uint8x8_t test_vmin_u8(uint8x8_t a1, uint8x8_t a2) { | 
|  | 23 | // CHECK: test_vmin_u8 | 
|  | 24 | return vmin_u8(a1, a2); | 
|  | 25 | // CHECK llvm.arm64.neon.umin.v8i8 | 
|  | 26 | } | 
|  | 27 |  | 
|  | 28 | uint8x16_t test_vminq_u8(uint8x16_t a1, uint8x16_t a2) { | 
|  | 29 | // CHECK: test_vminq_u8 | 
|  | 30 | return vminq_u8(a1, a2); | 
|  | 31 | // CHECK llvm.arm64.neon.umin.v16i8 | 
|  | 32 | } | 
|  | 33 |  | 
|  | 34 | int16x8_t test_vmaxq_s16(int16x8_t a1, int16x8_t a2) { | 
|  | 35 | // CHECK: test_vmaxq_s16 | 
|  | 36 | return vmaxq_s16(a1, a2); | 
|  | 37 | // CHECK llvm.arm64.neon.smax.v8i16 | 
|  | 38 | } | 
|  | 39 |  | 
|  | 40 | // Test the more complicated cases of [suf]32 and f64 | 
|  | 41 | float64x2_t test_vmaxq_f64(float64x2_t a1, float64x2_t a2) { | 
|  | 42 | // CHECK: test_vmaxq_f64 | 
|  | 43 | return vmaxq_f64(a1, a2); | 
|  | 44 | // CHECK llvm.arm64.neon.fmax.v2f64 | 
|  | 45 | } | 
|  | 46 |  | 
|  | 47 | float32x4_t test_vmaxq_f32(float32x4_t a1, float32x4_t a2) { | 
|  | 48 | // CHECK: test_vmaxq_f32 | 
|  | 49 | return vmaxq_f32(a1, a2); | 
|  | 50 | // CHECK llvm.arm64.neon.fmax.v4f32 | 
|  | 51 | } | 
|  | 52 |  | 
|  | 53 | float64x2_t test_vminq_f64(float64x2_t a1, float64x2_t a2) { | 
|  | 54 | // CHECK: test_vminq_f64 | 
|  | 55 | return vminq_f64(a1, a2); | 
|  | 56 | // CHECK llvm.arm64.neon.fmin.v2f64 | 
|  | 57 | } | 
|  | 58 |  | 
|  | 59 | float32x2_t test_vmax_f32(float32x2_t a1, float32x2_t a2) { | 
|  | 60 | // CHECK: test_vmax_f32 | 
|  | 61 | return vmax_f32(a1, a2); | 
|  | 62 | // CHECK llvm.arm64.neon.fmax.v2f32 | 
|  | 63 | } | 
|  | 64 |  | 
|  | 65 | int32x2_t test_vmax_s32(int32x2_t a1, int32x2_t a2) { | 
|  | 66 | // CHECK: test_vmax_s32 | 
|  | 67 | return vmax_s32(a1, a2); | 
|  | 68 | // CHECK llvm.arm64.neon.smax.v2i32 | 
|  | 69 | } | 
|  | 70 |  | 
|  | 71 | uint32x2_t test_vmin_u32(uint32x2_t a1, uint32x2_t a2) { | 
|  | 72 | // CHECK: test_vmin_u32 | 
|  | 73 | return vmin_u32(a1, a2); | 
|  | 74 | // CHECK llvm.arm64.neon.umin.v2i32 | 
|  | 75 | } | 
|  | 76 |  | 
|  | 77 | float32_t test_vmaxnmv_f32(float32x2_t a1) { | 
|  | 78 | // CHECK: test_vmaxnmv_f32 | 
|  | 79 | return vmaxnmv_f32(a1); | 
|  | 80 | // CHECK: llvm.arm64.neon.fmaxnmv.f32.v2f32 | 
|  | 81 | // CHECK-NEXT: ret | 
|  | 82 | } | 
|  | 83 |  | 
|  | 84 | // this doesn't translate into a valid instruction, regardless of what the | 
|  | 85 | // ARM doc says. | 
|  | 86 | #if 0 | 
|  | 87 | float64_t test_vmaxnmvq_f64(float64x2_t a1) { | 
|  | 88 | // CHECK@ test_vmaxnmvq_f64 | 
|  | 89 | return vmaxnmvq_f64(a1); | 
|  | 90 | // CHECK@ llvm.arm64.neon.saddlv.i64.v2i32 | 
|  | 91 | // CHECK-NEXT@ ret | 
|  | 92 | } | 
|  | 93 | #endif | 
|  | 94 |  | 
|  | 95 | float32_t test_vmaxnmvq_f32(float32x4_t a1) { | 
|  | 96 | // CHECK: test_vmaxnmvq_f32 | 
|  | 97 | return vmaxnmvq_f32(a1); | 
|  | 98 | // CHECK: llvm.arm64.neon.fmaxnmv.f32.v4f32 | 
|  | 99 | // CHECK-NEXT: ret | 
|  | 100 | } | 
|  | 101 |  | 
|  | 102 | float32_t test_vmaxv_f32(float32x2_t a1) { | 
|  | 103 | // CHECK: test_vmaxv_f32 | 
|  | 104 | return vmaxv_f32(a1); | 
|  | 105 | // CHECK: llvm.arm64.neon.fmaxv.f32.v2f32 | 
|  | 106 | // FIXME check that the 2nd and 3rd arguments are the same V register below | 
|  | 107 | // CHECK-CODEGEN: fmaxp.2s | 
|  | 108 | // CHECK-NEXT: ret | 
|  | 109 | } | 
|  | 110 |  | 
|  | 111 | int32_t test_vmaxv_s32(int32x2_t a1) { | 
|  | 112 | // CHECK: test_vmaxv_s32 | 
|  | 113 | return vmaxv_s32(a1); | 
|  | 114 | // CHECK: llvm.arm64.neon.smaxv.i32.v2i32 | 
|  | 115 | // FIXME check that the 2nd and 3rd arguments are the same V register below | 
|  | 116 | // CHECK-CODEGEN: smaxp.2s | 
|  | 117 | // CHECK-NEXT: ret | 
|  | 118 | } | 
|  | 119 |  | 
|  | 120 | uint32_t test_vmaxv_u32(uint32x2_t a1) { | 
|  | 121 | // CHECK: test_vmaxv_u32 | 
|  | 122 | return vmaxv_u32(a1); | 
|  | 123 | // CHECK: llvm.arm64.neon.umaxv.i32.v2i32 | 
|  | 124 | // FIXME check that the 2nd and 3rd arguments are the same V register below | 
|  | 125 | // CHECK-CODEGEN: umaxp.2s | 
|  | 126 | // CHECK-NEXT: ret | 
|  | 127 | } | 
|  | 128 |  | 
|  | 129 | // FIXME punt on this for now; don't forget to fix CHECKs | 
|  | 130 | #if 0 | 
|  | 131 | float64_t test_vmaxvq_f64(float64x2_t a1) { | 
|  | 132 | // CHECK@ test_vmaxvq_f64 | 
|  | 133 | return vmaxvq_f64(a1); | 
|  | 134 | // CHECK@ llvm.arm64.neon.fmaxv.i64.v2f64 | 
|  | 135 | // CHECK-NEXT@ ret | 
|  | 136 | } | 
|  | 137 | #endif | 
|  | 138 |  | 
|  | 139 | float32_t test_vmaxvq_f32(float32x4_t a1) { | 
|  | 140 | // CHECK: test_vmaxvq_f32 | 
|  | 141 | return vmaxvq_f32(a1); | 
|  | 142 | // CHECK: llvm.arm64.neon.fmaxv.f32.v4f32 | 
|  | 143 | // CHECK-NEXT: ret | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | float32_t test_vminnmv_f32(float32x2_t a1) { | 
|  | 147 | // CHECK: test_vminnmv_f32 | 
|  | 148 | return vminnmv_f32(a1); | 
|  | 149 | // CHECK: llvm.arm64.neon.fminnmv.f32.v2f32 | 
|  | 150 | // CHECK-NEXT: ret | 
|  | 151 | } | 
|  | 152 |  | 
|  | 153 | float32_t test_vminvq_f32(float32x4_t a1) { | 
|  | 154 | // CHECK: test_vminvq_f32 | 
|  | 155 | return vminvq_f32(a1); | 
|  | 156 | // CHECK: llvm.arm64.neon.fminv.f32.v4f32 | 
|  | 157 | // CHECK-NEXT: ret | 
|  | 158 | } | 
|  | 159 |  | 
|  | 160 | // this doesn't translate into a valid instruction, regardless of what the ARM | 
|  | 161 | // doc says. | 
|  | 162 | #if 0 | 
|  | 163 | float64_t test_vminnmvq_f64(float64x2_t a1) { | 
|  | 164 | // CHECK@ test_vminnmvq_f64 | 
|  | 165 | return vminnmvq_f64(a1); | 
|  | 166 | // CHECK@ llvm.arm64.neon.saddlv.i64.v2i32 | 
|  | 167 | // CHECK-NEXT@ ret | 
|  | 168 | } | 
|  | 169 | #endif | 
|  | 170 |  | 
|  | 171 | float32_t test_vminnmvq_f32(float32x4_t a1) { | 
|  | 172 | // CHECK: test_vminnmvq_f32 | 
|  | 173 | return vminnmvq_f32(a1); | 
|  | 174 | // CHECK: llvm.arm64.neon.fminnmv.f32.v4f32 | 
|  | 175 | // CHECK-NEXT: ret | 
|  | 176 | } | 
|  | 177 |  | 
|  | 178 | float32_t test_vminv_f32(float32x2_t a1) { | 
|  | 179 | // CHECK: test_vminv_f32 | 
|  | 180 | return vminv_f32(a1); | 
|  | 181 | // CHECK: llvm.arm64.neon.fminv.f32.v2f32 | 
|  | 182 | // CHECK-NEXT: ret | 
|  | 183 | } | 
|  | 184 |  | 
|  | 185 | int32_t test_vminv_s32(int32x2_t a1) { | 
|  | 186 | // CHECK: test_vminv_s32 | 
|  | 187 | return vminv_s32(a1); | 
|  | 188 | // CHECK: llvm.arm64.neon.sminv.i32.v2i32 | 
|  | 189 | // CHECK-CODEGEN: sminp.2s | 
|  | 190 | // CHECK-NEXT: ret | 
|  | 191 | } | 
|  | 192 |  | 
|  | 193 | uint32_t test_vminv_u32(uint32x2_t a1) { | 
|  | 194 | // CHECK: test_vminv_u32 | 
|  | 195 | return vminv_u32(a1); | 
|  | 196 | // CHECK: llvm.arm64.neon.fminv.f32.v2f32 | 
|  | 197 | } | 
|  | 198 |  | 
|  | 199 | // FIXME punt on this for now; don't forget to fix CHECKs | 
|  | 200 | #if 0 | 
|  | 201 | float64_t test_vminvq_f64(float64x2_t a1) { | 
|  | 202 | // CHECK@ test_vminvq_f64 | 
|  | 203 | return vminvq_f64(a1); | 
|  | 204 | // CHECK@ llvm.arm64.neon.saddlv.i64.v2i32 | 
|  | 205 | // CHECK-NEXT@ ret | 
|  | 206 | } | 
|  | 207 | #endif |