| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s | 
|  | 2 |  | 
|  | 3 | %struct.float16x4x2_t = type { [2 x <4 x half>] } | 
|  | 4 | %struct.float16x8x2_t = type { [2 x <8 x half>] } | 
|  | 5 |  | 
|  | 6 | define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) { | 
|  | 7 | ; CHECKLABEL: test_vabs_f16: | 
|  | 8 | ; CHECK:         vabs.f16 d0, d0 | 
|  | 9 | ; CHECK-NEXT:    bx lr | 
|  | 10 | entry: | 
|  | 11 | %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a) | 
|  | 12 | ret <4 x half> %vabs1.i | 
|  | 13 | } | 
|  | 14 |  | 
|  | 15 | define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) { | 
|  | 16 | ; CHECKLABEL: test_vabsq_f16: | 
|  | 17 | ; CHECK:         vabs.f16 q0, q0 | 
|  | 18 | ; CHECK-NEXT:    bx lr | 
|  | 19 | entry: | 
|  | 20 | %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) | 
|  | 21 | ret <8 x half> %vabs1.i | 
|  | 22 | } | 
|  | 23 |  | 
|  | 24 | define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) { | 
|  | 25 | ; CHECKLABEL: test_vceqz_f16: | 
|  | 26 | ; CHECK:         vceq.f16 d0, d0, #0 | 
|  | 27 | ; CHECK-NEXT:    bx lr | 
|  | 28 | entry: | 
|  | 29 | %0 = fcmp oeq <4 x half> %a, zeroinitializer | 
|  | 30 | %vceqz.i = sext <4 x i1> %0 to <4 x i16> | 
|  | 31 | ret <4 x i16> %vceqz.i | 
|  | 32 | } | 
|  | 33 |  | 
|  | 34 | define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) { | 
|  | 35 | ; CHECKLABEL: test_vceqzq_f16: | 
|  | 36 | ; CHECK:         vceq.f16 q0, q0, #0 | 
|  | 37 | ; CHECK-NEXT:    bx lr | 
|  | 38 | entry: | 
|  | 39 | %0 = fcmp oeq <8 x half> %a, zeroinitializer | 
|  | 40 | %vceqz.i = sext <8 x i1> %0 to <8 x i16> | 
|  | 41 | ret <8 x i16> %vceqz.i | 
|  | 42 | } | 
|  | 43 |  | 
|  | 44 | define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) { | 
|  | 45 | ; CHECKLABEL: test_vcgez_f16: | 
|  | 46 | ; CHECK:         vcge.f16 d0, d0, #0 | 
|  | 47 | ; CHECK-NEXT:    bx lr | 
|  | 48 | entry: | 
|  | 49 | %0 = fcmp oge <4 x half> %a, zeroinitializer | 
|  | 50 | %vcgez.i = sext <4 x i1> %0 to <4 x i16> | 
|  | 51 | ret <4 x i16> %vcgez.i | 
|  | 52 | } | 
|  | 53 |  | 
|  | 54 | define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) { | 
|  | 55 | ; CHECKLABEL: test_vcgezq_f16: | 
|  | 56 | ; CHECK:         vcge.f16 q0, q0, #0 | 
|  | 57 | ; CHECK-NEXT:    bx lr | 
|  | 58 | entry: | 
|  | 59 | %0 = fcmp oge <8 x half> %a, zeroinitializer | 
|  | 60 | %vcgez.i = sext <8 x i1> %0 to <8 x i16> | 
|  | 61 | ret <8 x i16> %vcgez.i | 
|  | 62 | } | 
|  | 63 |  | 
|  | 64 | define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) { | 
|  | 65 | ; CHECKLABEL: test_vcgtz_f16: | 
|  | 66 | ; CHECK:         vcgt.f16 d0, d0, #0 | 
|  | 67 | ; CHECK-NEXT:    bx lr | 
|  | 68 | entry: | 
|  | 69 | %0 = fcmp ogt <4 x half> %a, zeroinitializer | 
|  | 70 | %vcgtz.i = sext <4 x i1> %0 to <4 x i16> | 
|  | 71 | ret <4 x i16> %vcgtz.i | 
|  | 72 | } | 
|  | 73 |  | 
|  | 74 | define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) { | 
|  | 75 | ; CHECKLABEL: test_vcgtzq_f16: | 
|  | 76 | ; CHECK:         vcgt.f16 q0, q0, #0 | 
|  | 77 | ; CHECK-NEXT:    bx lr | 
|  | 78 | entry: | 
|  | 79 | %0 = fcmp ogt <8 x half> %a, zeroinitializer | 
|  | 80 | %vcgtz.i = sext <8 x i1> %0 to <8 x i16> | 
|  | 81 | ret <8 x i16> %vcgtz.i | 
|  | 82 | } | 
|  | 83 |  | 
|  | 84 | define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) { | 
|  | 85 | ; CHECKLABEL: test_vclez_f16: | 
|  | 86 | ; CHECK:         vcle.f16 d0, d0, #0 | 
|  | 87 | ; CHECK-NEXT:    bx lr | 
|  | 88 | entry: | 
|  | 89 | %0 = fcmp ole <4 x half> %a, zeroinitializer | 
|  | 90 | %vclez.i = sext <4 x i1> %0 to <4 x i16> | 
|  | 91 | ret <4 x i16> %vclez.i | 
|  | 92 | } | 
|  | 93 |  | 
|  | 94 | define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) { | 
|  | 95 | ; CHECKLABEL: test_vclezq_f16: | 
|  | 96 | ; CHECK:         vcle.f16 q0, q0, #0 | 
|  | 97 | ; CHECK-NEXT:    bx lr | 
|  | 98 | entry: | 
|  | 99 | %0 = fcmp ole <8 x half> %a, zeroinitializer | 
|  | 100 | %vclez.i = sext <8 x i1> %0 to <8 x i16> | 
|  | 101 | ret <8 x i16> %vclez.i | 
|  | 102 | } | 
|  | 103 |  | 
|  | 104 | define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) { | 
|  | 105 | ; CHECKLABEL: test_vcltz_f16: | 
|  | 106 | ; CHECK:         vclt.f16 d0, d0, #0 | 
|  | 107 | ; CHECK-NEXT:    bx lr | 
|  | 108 | entry: | 
|  | 109 | %0 = fcmp olt <4 x half> %a, zeroinitializer | 
|  | 110 | %vcltz.i = sext <4 x i1> %0 to <4 x i16> | 
|  | 111 | ret <4 x i16> %vcltz.i | 
|  | 112 | } | 
|  | 113 |  | 
|  | 114 | define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) { | 
|  | 115 | ; CHECKLABEL: test_vcltzq_f16: | 
|  | 116 | ; CHECK:         vclt.f16 q0, q0, #0 | 
|  | 117 | ; CHECK-NEXT:    bx lr | 
|  | 118 | entry: | 
|  | 119 | %0 = fcmp olt <8 x half> %a, zeroinitializer | 
|  | 120 | %vcltz.i = sext <8 x i1> %0 to <8 x i16> | 
|  | 121 | ret <8 x i16> %vcltz.i | 
|  | 122 | } | 
|  | 123 |  | 
|  | 124 | ; FIXME (PR38404) | 
|  | 125 | ; | 
|  | 126 | ;define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) { | 
|  | 127 | ;entry: | 
|  | 128 | ;  %vcvt.i = sitofp <4 x i16> %a to <4 x half> | 
|  | 129 | ;  ret <4 x half> %vcvt.i | 
|  | 130 | ;} | 
|  | 131 | ; | 
|  | 132 | ;define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) { | 
|  | 133 | ;entry: | 
|  | 134 | ;  %vcvt.i = sitofp <8 x i16> %a to <8 x half> | 
|  | 135 | ;  ret <8 x half> %vcvt.i | 
|  | 136 | ;} | 
|  | 137 |  | 
|  | 138 | ;define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) { | 
|  | 139 | ;entry: | 
|  | 140 | ;  %vcvt.i = uitofp <4 x i16> %a to <4 x half> | 
|  | 141 | ;  ret <4 x half> %vcvt.i | 
|  | 142 | ;} | 
|  | 143 |  | 
|  | 144 | ;define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) { | 
|  | 145 | ;entry: | 
|  | 146 | ;  %vcvt.i = uitofp <8 x i16> %a to <8 x half> | 
|  | 147 | ;  ret <8 x half> %vcvt.i | 
|  | 148 | ;} | 
|  | 149 |  | 
|  | 150 | ;define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) { | 
|  | 151 | ;entry: | 
|  | 152 | ;  %vcvt.i = fptosi <4 x half> %a to <4 x i16> | 
|  | 153 | ;  ret <4 x i16> %vcvt.i | 
|  | 154 | ;} | 
|  | 155 |  | 
|  | 156 | ;define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) { | 
|  | 157 | ;entry: | 
|  | 158 | ;  %vcvt.i = fptosi <8 x half> %a to <8 x i16> | 
|  | 159 | ;  ret <8 x i16> %vcvt.i | 
|  | 160 | ;} | 
|  | 161 |  | 
|  | 162 | ;define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) { | 
|  | 163 | ;entry: | 
|  | 164 | ;  %vcvt.i = fptoui <4 x half> %a to <4 x i16> | 
|  | 165 | ;  ret <4 x i16> %vcvt.i | 
|  | 166 | ;} | 
|  | 167 |  | 
|  | 168 | ;define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) { | 
|  | 169 | ;entry: | 
|  | 170 | ;  %vcvt.i = fptoui <8 x half> %a to <8 x i16> | 
|  | 171 | ;  ret <8 x i16> %vcvt.i | 
|  | 172 | ;} | 
|  | 173 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 174 | define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) { | 
|  | 175 | ; CHECK-LABEL: test_vcvta_s16_f16: | 
|  | 176 | ; CHECK:         vcvta.s16.f16 d0, d0 | 
|  | 177 | ; CHECK-NEXT:    bx lr | 
|  | 178 | entry: | 
|  | 179 | %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a) | 
|  | 180 | ret <4 x i16> %vcvta_s16_v1.i | 
|  | 181 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 182 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 183 | define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) { | 
|  | 184 | ; CHECK-LABEL: test_vcvta_u16_f16: | 
|  | 185 | ; CHECK:         vcvta.u16.f16 d0, d0 | 
|  | 186 | ; CHECK-NEXT:    bx lr | 
|  | 187 | entry: | 
|  | 188 | %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a) | 
|  | 189 | ret <4 x i16> %vcvta_u16_v1.i | 
|  | 190 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 191 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 192 | define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) { | 
|  | 193 | ; CHECK-LABEL: test_vcvtaq_s16_f16: | 
|  | 194 | ; CHECK:         vcvta.s16.f16 q0, q0 | 
|  | 195 | ; CHECK-NEXT:    bx lr | 
|  | 196 | entry: | 
|  | 197 | %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a) | 
|  | 198 | ret <8 x i16> %vcvtaq_s16_v1.i | 
|  | 199 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 200 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 201 | define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) { | 
|  | 202 | ; CHECK-LABEL: test_vcvtm_s16_f16: | 
|  | 203 | ; CHECK:         vcvtm.s16.f16 d0, d0 | 
|  | 204 | ; CHECK-NEXT:    bx lr | 
|  | 205 | entry: | 
|  | 206 | %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a) | 
|  | 207 | ret <4 x i16> %vcvtm_s16_v1.i | 
|  | 208 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 209 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 210 | define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) { | 
|  | 211 | ; CHECK-LABEL: test_vcvtmq_s16_f16: | 
|  | 212 | ; CHECK:         vcvtm.s16.f16 q0, q0 | 
|  | 213 | ; CHECK-NEXT:    bx lr | 
|  | 214 | entry: | 
|  | 215 | %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a) | 
|  | 216 | ret <8 x i16> %vcvtmq_s16_v1.i | 
|  | 217 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 218 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 219 | define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) { | 
|  | 220 | ; CHECK-LABEL: test_vcvtm_u16_f16: | 
|  | 221 | ; CHECK:         vcvtm.u16.f16 d0, d0 | 
|  | 222 | ; CHECK-NEXT:    bx lr | 
|  | 223 | entry: | 
|  | 224 | %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a) | 
|  | 225 | ret <4 x i16> %vcvtm_u16_v1.i | 
|  | 226 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 227 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 228 | define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) { | 
|  | 229 | ; CHECK-LABEL: test_vcvtmq_u16_f16: | 
|  | 230 | ; CHECK:         vcvtm.u16.f16 q0, q0 | 
|  | 231 | ; CHECK-NEXT:    bx lr | 
|  | 232 | entry: | 
|  | 233 | %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a) | 
|  | 234 | ret <8 x i16> %vcvtmq_u16_v1.i | 
|  | 235 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 236 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 237 | define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) { | 
|  | 238 | ; CHECK-LABEL: test_vcvtn_s16_f16: | 
|  | 239 | ; CHECK:         vcvtn.s16.f16 d0, d0 | 
|  | 240 | ; CHECK-NEXT:    bx lr | 
|  | 241 | entry: | 
|  | 242 | %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a) | 
|  | 243 | ret <4 x i16> %vcvtn_s16_v1.i | 
|  | 244 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 245 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 246 | define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) { | 
|  | 247 | ; CHECK-LABEL: test_vcvtnq_s16_f16: | 
|  | 248 | ; CHECK:         vcvtn.s16.f16 q0, q0 | 
|  | 249 | ; CHECK-NEXT:    bx lr | 
|  | 250 | entry: | 
|  | 251 | %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a) | 
|  | 252 | ret <8 x i16> %vcvtnq_s16_v1.i | 
|  | 253 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 254 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 255 | define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) { | 
|  | 256 | ; CHECK-LABEL: test_vcvtn_u16_f16: | 
|  | 257 | ; CHECK:         vcvtn.u16.f16 d0, d0 | 
|  | 258 | ; CHECK-NEXT:    bx lr | 
|  | 259 | entry: | 
|  | 260 | %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a) | 
|  | 261 | ret <4 x i16> %vcvtn_u16_v1.i | 
|  | 262 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 263 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 264 | define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) { | 
|  | 265 | ; CHECK-LABEL: test_vcvtnq_u16_f16: | 
|  | 266 | ; CHECK:         vcvtn.u16.f16 q0, q0 | 
|  | 267 | ; CHECK-NEXT:    bx lr | 
|  | 268 | entry: | 
|  | 269 | %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a) | 
|  | 270 | ret <8 x i16> %vcvtnq_u16_v1.i | 
|  | 271 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 272 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 273 | define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) { | 
|  | 274 | ; CHECK-LABEL: test_vcvtp_s16_f16: | 
|  | 275 | ; CHECK:         vcvtp.s16.f16 d0, d0 | 
|  | 276 | ; CHECK-NEXT:    bx lr | 
|  | 277 | entry: | 
|  | 278 | %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a) | 
|  | 279 | ret <4 x i16> %vcvtp_s16_v1.i | 
|  | 280 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 281 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 282 | define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) { | 
|  | 283 | ; CHECK-LABEL: test_vcvtpq_s16_f16: | 
|  | 284 | ; CHECK:         vcvtp.s16.f16 q0, q0 | 
|  | 285 | ; CHECK-NEXT:    bx lr | 
|  | 286 | entry: | 
|  | 287 | %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a) | 
|  | 288 | ret <8 x i16> %vcvtpq_s16_v1.i | 
|  | 289 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 290 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 291 | define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) { | 
|  | 292 | ; CHECK-LABEL: test_vcvtp_u16_f16: | 
|  | 293 | ; CHECK:         vcvtp.u16.f16 d0, d0 | 
|  | 294 | ; CHECK-NEXT:    bx lr | 
|  | 295 | entry: | 
|  | 296 | %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a) | 
|  | 297 | ret <4 x i16> %vcvtp_u16_v1.i | 
|  | 298 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 299 |  | 
| Sjoerd Meijer | 8e7fab0 | 2018-08-02 14:04:48 +0000 | [diff] [blame] | 300 | define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) { | 
|  | 301 | ; CHECK-LABEL: test_vcvtpq_u16_f16: | 
|  | 302 | ; CHECK:         vcvtp.u16.f16 q0, q0 | 
|  | 303 | ; CHECK-NEXT:    bx lr | 
|  | 304 | entry: | 
|  | 305 | %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a) | 
|  | 306 | ret <8 x i16> %vcvtpq_u16_v1.i | 
|  | 307 | } | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 308 |  | 
|  | 309 | define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) { | 
|  | 310 | ; CHECKLABEL: test_vneg_f16: | 
|  | 311 | ; CHECK:         vneg.f16 d0, d0 | 
|  | 312 | ; CHECK-NEXT:    bx lr | 
|  | 313 | entry: | 
|  | 314 | %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a | 
|  | 315 | ret <4 x half> %sub.i | 
|  | 316 | } | 
|  | 317 |  | 
|  | 318 | define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) { | 
|  | 319 | ; CHECKLABEL: test_vnegq_f16: | 
|  | 320 | ; CHECK:         vneg.f16 q0, q0 | 
|  | 321 | ; CHECK-NEXT:    bx lr | 
|  | 322 | entry: | 
|  | 323 | %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a | 
|  | 324 | ret <8 x half> %sub.i | 
|  | 325 | } | 
|  | 326 |  | 
|  | 327 | define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) { | 
|  | 328 | ; CHECKLABEL: test_vrecpe_f16: | 
|  | 329 | ; CHECK:         vrecpe.f16 d0, d0 | 
|  | 330 | ; CHECK-NEXT:    bx lr | 
|  | 331 | entry: | 
|  | 332 | %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a) | 
|  | 333 | ret <4 x half> %vrecpe_v1.i | 
|  | 334 | } | 
|  | 335 |  | 
|  | 336 | define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) { | 
|  | 337 | ; CHECKLABEL: test_vrecpeq_f16: | 
|  | 338 | ; CHECK:         vrecpe.f16 q0, q0 | 
|  | 339 | ; CHECK-NEXT:    bx lr | 
|  | 340 | entry: | 
|  | 341 | %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a) | 
|  | 342 | ret <8 x half> %vrecpeq_v1.i | 
|  | 343 | } | 
|  | 344 |  | 
|  | 345 | define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) { | 
|  | 346 | ; CHECKLABEL: test_vrnd_f16: | 
|  | 347 | ; CHECK:         vrintz.f16 d0, d0 | 
|  | 348 | ; CHECK-NEXT:    bx lr | 
|  | 349 | entry: | 
|  | 350 | %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a) | 
|  | 351 | ret <4 x half> %vrnd_v1.i | 
|  | 352 | } | 
|  | 353 |  | 
|  | 354 | define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) { | 
|  | 355 | ; CHECKLABEL: test_vrndq_f16: | 
|  | 356 | ; CHECK:         vrintz.f16 q0, q0 | 
|  | 357 | ; CHECK-NEXT:    bx lr | 
|  | 358 | entry: | 
|  | 359 | %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a) | 
|  | 360 | ret <8 x half> %vrndq_v1.i | 
|  | 361 | } | 
|  | 362 |  | 
|  | 363 | define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) { | 
|  | 364 | ; CHECKLABEL: test_vrnda_f16: | 
|  | 365 | ; CHECK:         vrinta.f16 d0, d0 | 
|  | 366 | ; CHECK-NEXT:    bx lr | 
|  | 367 | entry: | 
|  | 368 | %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a) | 
|  | 369 | ret <4 x half> %vrnda_v1.i | 
|  | 370 | } | 
|  | 371 |  | 
|  | 372 | define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) { | 
|  | 373 | ; CHECKLABEL: test_vrndaq_f16: | 
|  | 374 | ; CHECK:         vrinta.f16 q0, q0 | 
|  | 375 | ; CHECK-NEXT:    bx lr | 
|  | 376 | entry: | 
|  | 377 | %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a) | 
|  | 378 | ret <8 x half> %vrndaq_v1.i | 
|  | 379 | } | 
|  | 380 |  | 
|  | 381 | define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) { | 
|  | 382 | ; CHECKLABEL: test_vrndm_f16: | 
|  | 383 | ; CHECK:         vrintm.f16 d0, d0 | 
|  | 384 | ; CHECK-NEXT:    bx lr | 
|  | 385 | entry: | 
|  | 386 | %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a) | 
|  | 387 | ret <4 x half> %vrndm_v1.i | 
|  | 388 | } | 
|  | 389 |  | 
|  | 390 | define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) { | 
|  | 391 | ; CHECKLABEL: test_vrndmq_f16: | 
|  | 392 | ; CHECK:         vrintm.f16 q0, q0 | 
|  | 393 | ; CHECK-NEXT:    bx lr | 
|  | 394 | entry: | 
|  | 395 | %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a) | 
|  | 396 | ret <8 x half> %vrndmq_v1.i | 
|  | 397 | } | 
|  | 398 |  | 
|  | 399 | define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) { | 
|  | 400 | ; CHECKLABEL: test_vrndn_f16: | 
|  | 401 | ; CHECK:         vrintn.f16 d0, d0 | 
|  | 402 | ; CHECK-NEXT:    bx lr | 
|  | 403 | entry: | 
|  | 404 | %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a) | 
|  | 405 | ret <4 x half> %vrndn_v1.i | 
|  | 406 | } | 
|  | 407 |  | 
|  | 408 | define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) { | 
|  | 409 | ; CHECKLABEL: test_vrndnq_f16: | 
|  | 410 | ; CHECK:         vrintn.f16 q0, q0 | 
|  | 411 | ; CHECK-NEXT:    bx lr | 
|  | 412 | entry: | 
|  | 413 | %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a) | 
|  | 414 | ret <8 x half> %vrndnq_v1.i | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) { | 
|  | 418 | ; CHECKLABEL: test_vrndp_f16: | 
|  | 419 | ; CHECK:         vrintp.f16 d0, d0 | 
|  | 420 | ; CHECK-NEXT:    bx lr | 
|  | 421 | entry: | 
|  | 422 | %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a) | 
|  | 423 | ret <4 x half> %vrndp_v1.i | 
|  | 424 | } | 
|  | 425 |  | 
|  | 426 | define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) { | 
|  | 427 | ; CHECKLABEL: test_vrndpq_f16: | 
|  | 428 | ; CHECK:         vrintp.f16 q0, q0 | 
|  | 429 | ; CHECK-NEXT:    bx lr | 
|  | 430 | entry: | 
|  | 431 | %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a) | 
|  | 432 | ret <8 x half> %vrndpq_v1.i | 
|  | 433 | } | 
|  | 434 |  | 
|  | 435 | define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) { | 
|  | 436 | ; CHECKLABEL: test_vrndx_f16: | 
|  | 437 | ; CHECK:         vrintx.f16 d0, d0 | 
|  | 438 | ; CHECK-NEXT:    bx lr | 
|  | 439 | entry: | 
|  | 440 | %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a) | 
|  | 441 | ret <4 x half> %vrndx_v1.i | 
|  | 442 | } | 
|  | 443 |  | 
|  | 444 | define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) { | 
|  | 445 | ; CHECKLABEL: test_vrndxq_f16: | 
|  | 446 | ; CHECK:         vrintx.f16 q0, q0 | 
|  | 447 | ; CHECK-NEXT:    bx lr | 
|  | 448 | entry: | 
|  | 449 | %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a) | 
|  | 450 | ret <8 x half> %vrndxq_v1.i | 
|  | 451 | } | 
|  | 452 |  | 
|  | 453 | define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) { | 
|  | 454 | ; CHECKLABEL: test_vrsqrte_f16: | 
|  | 455 | ; CHECK:         vrsqrte.f16 d0, d0 | 
|  | 456 | ; CHECK-NEXT:    bx lr | 
|  | 457 | entry: | 
|  | 458 | %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a) | 
|  | 459 | ret <4 x half> %vrsqrte_v1.i | 
|  | 460 | } | 
|  | 461 |  | 
|  | 462 | define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) { | 
|  | 463 | ; CHECKLABEL: test_vrsqrteq_f16: | 
|  | 464 | ; CHECK:         vrsqrte.f16 q0, q0 | 
|  | 465 | ; CHECK-NEXT:    bx lr | 
|  | 466 | entry: | 
|  | 467 | %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a) | 
|  | 468 | ret <8 x half> %vrsqrteq_v1.i | 
|  | 469 | } | 
|  | 470 |  | 
|  | 471 | define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 472 | ; CHECKLABEL: test_vadd_f16: | 
|  | 473 | ; CHECK:         vadd.f16 d0, d0, d1 | 
|  | 474 | ; CHECK-NEXT:    bx lr | 
|  | 475 | entry: | 
|  | 476 | %add.i = fadd <4 x half> %a, %b | 
|  | 477 | ret <4 x half> %add.i | 
|  | 478 | } | 
|  | 479 |  | 
|  | 480 | define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 481 | ; CHECKLABEL: test_vaddq_f16: | 
|  | 482 | ; CHECK:         vadd.f16 q0, q0, q1 | 
|  | 483 | ; CHECK-NEXT:    bx lr | 
|  | 484 | entry: | 
|  | 485 | %add.i = fadd <8 x half> %a, %b | 
|  | 486 | ret <8 x half> %add.i | 
|  | 487 | } | 
|  | 488 |  | 
|  | 489 | define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 490 | ; CHECKLABEL: test_vabd_f16: | 
|  | 491 | ; CHECK:         vabd.f16 d0, d0, d1 | 
|  | 492 | ; CHECK-NEXT:    bx lr | 
|  | 493 | entry: | 
|  | 494 | %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 495 | ret <4 x half> %vabd_v2.i | 
|  | 496 | } | 
|  | 497 |  | 
|  | 498 | define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 499 | ; CHECKLABEL: test_vabdq_f16: | 
|  | 500 | ; CHECK:         vabd.f16 q0, q0, q1 | 
|  | 501 | ; CHECK-NEXT:    bx lr | 
|  | 502 | entry: | 
|  | 503 | %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 504 | ret <8 x half> %vabdq_v2.i | 
|  | 505 | } | 
|  | 506 |  | 
|  | 507 | define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 508 | ; CHECKLABEL: test_vcage_f16: | 
|  | 509 | ; CHECK:         vacge.f16 d0, d0, d1 | 
|  | 510 | ; CHECK-NEXT:    bx lr | 
|  | 511 | entry: | 
|  | 512 | %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 513 | ret <4 x i16> %vcage_v2.i | 
|  | 514 | } | 
|  | 515 |  | 
|  | 516 | define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 517 | ; CHECKLABEL: test_vcageq_f16: | 
|  | 518 | ; CHECK:         vacge.f16 q0, q0, q1 | 
|  | 519 | ; CHECK-NEXT:    bx lr | 
|  | 520 | entry: | 
|  | 521 | %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 522 | ret <8 x i16> %vcageq_v2.i | 
|  | 523 | } | 
|  | 524 |  | 
|  | 525 | ; FIXME (PR38404) | 
|  | 526 | ; | 
|  | 527 | ;define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 528 | ;entry: | 
|  | 529 | ;  %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 530 | ;  ret <4 x i16> %vcagt_v2.i | 
|  | 531 | ;} | 
|  | 532 | ; | 
|  | 533 | ;define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 534 | ;entry: | 
|  | 535 | ;  %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 536 | ;  ret <8 x i16> %vcagtq_v2.i | 
|  | 537 | ;} | 
|  | 538 |  | 
|  | 539 | define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 540 | ; CHECKLABEL: test_vcale_f16: | 
|  | 541 | ; CHECK:         vacge.f16 d0, d1, d0 | 
|  | 542 | ; CHECK-NEXT:    bx lr | 
|  | 543 | entry: | 
|  | 544 | %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a) | 
|  | 545 | ret <4 x i16> %vcale_v2.i | 
|  | 546 | } | 
|  | 547 |  | 
|  | 548 | define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 549 | ; CHECKLABEL: test_vcaleq_f16: | 
|  | 550 | ; CHECK:         vacge.f16 q0, q1, q0 | 
|  | 551 | ; CHECK-NEXT:    bx lr | 
|  | 552 | entry: | 
|  | 553 | %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a) | 
|  | 554 | ret <8 x i16> %vcaleq_v2.i | 
|  | 555 | } | 
|  | 556 |  | 
|  | 557 | ; FIXME (PR38404) | 
|  | 558 | ; | 
|  | 559 | ;define dso_local <4 x i16> @test_vcalt_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 560 | ;entry: | 
|  | 561 | ;  %vcalt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a) | 
|  | 562 | ;  ret <4 x i16> %vcalt_v2.i | 
|  | 563 | ;} | 
|  | 564 |  | 
|  | 565 | ;define dso_local <8 x i16> @test_vcaltq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 566 | ;entry: | 
|  | 567 | ;  %vcaltq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a) | 
|  | 568 | ;  ret <8 x i16> %vcaltq_v2.i | 
|  | 569 | ;} | 
|  | 570 |  | 
|  | 571 | define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 572 | ; CHECKLABEL: test_vceq_f16: | 
|  | 573 | ; CHECK:         vceq.f16 d0, d0, d1 | 
|  | 574 | ; CHECK-NEXT:    bx lr | 
|  | 575 | entry: | 
|  | 576 | %cmp.i = fcmp oeq <4 x half> %a, %b | 
|  | 577 | %sext.i = sext <4 x i1> %cmp.i to <4 x i16> | 
|  | 578 | ret <4 x i16> %sext.i | 
|  | 579 | } | 
|  | 580 |  | 
|  | 581 | define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 582 | ; CHECKLABEL: test_vceqq_f16: | 
|  | 583 | ; CHECK:         vceq.f16 q0, q0, q1 | 
|  | 584 | ; CHECK-NEXT:    bx lr | 
|  | 585 | entry: | 
|  | 586 | %cmp.i = fcmp oeq <8 x half> %a, %b | 
|  | 587 | %sext.i = sext <8 x i1> %cmp.i to <8 x i16> | 
|  | 588 | ret <8 x i16> %sext.i | 
|  | 589 | } | 
|  | 590 |  | 
|  | 591 | define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 592 | ; CHECKLABEL: test_vcge_f16: | 
|  | 593 | ; CHECK:         vcge.f16 d0, d0, d1 | 
|  | 594 | ; CHECK-NEXT:    bx lr | 
|  | 595 | entry: | 
|  | 596 | %cmp.i = fcmp oge <4 x half> %a, %b | 
|  | 597 | %sext.i = sext <4 x i1> %cmp.i to <4 x i16> | 
|  | 598 | ret <4 x i16> %sext.i | 
|  | 599 | } | 
|  | 600 |  | 
|  | 601 | define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 602 | ; CHECKLABEL: test_vcgeq_f16: | 
|  | 603 | ; CHECK:         vcge.f16 q0, q0, q1 | 
|  | 604 | ; CHECK-NEXT:    bx lr | 
|  | 605 | entry: | 
|  | 606 | %cmp.i = fcmp oge <8 x half> %a, %b | 
|  | 607 | %sext.i = sext <8 x i1> %cmp.i to <8 x i16> | 
|  | 608 | ret <8 x i16> %sext.i | 
|  | 609 | } | 
|  | 610 |  | 
|  | 611 | define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 612 | ; CHECKLABEL: test_vcgt_f16: | 
|  | 613 | ; CHECK:         vcgt.f16 d0, d0, d1 | 
|  | 614 | ; CHECK-NEXT:    bx lr | 
|  | 615 | entry: | 
|  | 616 | %cmp.i = fcmp ogt <4 x half> %a, %b | 
|  | 617 | %sext.i = sext <4 x i1> %cmp.i to <4 x i16> | 
|  | 618 | ret <4 x i16> %sext.i | 
|  | 619 | } | 
|  | 620 |  | 
|  | 621 | define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 622 | ; CHECKLABEL: test_vcgtq_f16: | 
|  | 623 | ; CHECK:         vcgt.f16 q0, q0, q1 | 
|  | 624 | ; CHECK-NEXT:    bx lr | 
|  | 625 | entry: | 
|  | 626 | %cmp.i = fcmp ogt <8 x half> %a, %b | 
|  | 627 | %sext.i = sext <8 x i1> %cmp.i to <8 x i16> | 
|  | 628 | ret <8 x i16> %sext.i | 
|  | 629 | } | 
|  | 630 |  | 
|  | 631 | define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 632 | ; CHECKLABEL: test_vcle_f16: | 
|  | 633 | ; CHECK:         vcge.f16 d0, d1, d0 | 
|  | 634 | ; CHECK-NEXT:    bx lr | 
|  | 635 | entry: | 
|  | 636 | %cmp.i = fcmp ole <4 x half> %a, %b | 
|  | 637 | %sext.i = sext <4 x i1> %cmp.i to <4 x i16> | 
|  | 638 | ret <4 x i16> %sext.i | 
|  | 639 | } | 
|  | 640 |  | 
|  | 641 | define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 642 | ; CHECKLABEL: test_vcleq_f16: | 
|  | 643 | ; CHECK:         vcge.f16 q0, q1, q0 | 
|  | 644 | ; CHECK-NEXT:    bx lr | 
|  | 645 | entry: | 
|  | 646 | %cmp.i = fcmp ole <8 x half> %a, %b | 
|  | 647 | %sext.i = sext <8 x i1> %cmp.i to <8 x i16> | 
|  | 648 | ret <8 x i16> %sext.i | 
|  | 649 | } | 
|  | 650 |  | 
|  | 651 | define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 652 | ; CHECKLABEL: test_vclt_f16: | 
|  | 653 | ; CHECK:         vcgt.f16 d0, d1, d0 | 
|  | 654 | ; CHECK-NEXT:    bx lr | 
|  | 655 | entry: | 
|  | 656 | %cmp.i = fcmp olt <4 x half> %a, %b | 
|  | 657 | %sext.i = sext <4 x i1> %cmp.i to <4 x i16> | 
|  | 658 | ret <4 x i16> %sext.i | 
|  | 659 | } | 
|  | 660 |  | 
|  | 661 | define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 662 | ; CHECKLABEL: test_vcltq_f16: | 
|  | 663 | ; CHECK:         vcgt.f16 q0, q1, q0 | 
|  | 664 | ; CHECK-NEXT:    bx lr | 
|  | 665 | entry: | 
|  | 666 | %cmp.i = fcmp olt <8 x half> %a, %b | 
|  | 667 | %sext.i = sext <8 x i1> %cmp.i to <8 x i16> | 
|  | 668 | ret <8 x i16> %sext.i | 
|  | 669 | } | 
|  | 670 |  | 
|  | 671 | define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) { | 
|  | 672 | ; CHECKLABEL: test_vcvt_n_f16_s16: | 
|  | 673 | ; CHECK:         vcvt.f16.s16 d0, d0, #2 | 
|  | 674 | ; CHECK-NEXT:    bx lr | 
|  | 675 | entry: | 
|  | 676 | %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2) | 
|  | 677 | ret <4 x half> %vcvt_n1 | 
|  | 678 | } | 
|  | 679 |  | 
|  | 680 | declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2 | 
|  | 681 |  | 
|  | 682 | define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) { | 
|  | 683 | ; CHECKLABEL: test_vcvtq_n_f16_s16: | 
|  | 684 | ; CHECK:         vcvt.f16.s16 q0, q0, #2 | 
|  | 685 | ; CHECK-NEXT:    bx lr | 
|  | 686 | entry: | 
|  | 687 | %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2) | 
|  | 688 | ret <8 x half> %vcvt_n1 | 
|  | 689 | } | 
|  | 690 |  | 
|  | 691 | declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2 | 
|  | 692 |  | 
|  | 693 | define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) { | 
|  | 694 | ; CHECKLABEL: test_vcvt_n_f16_u16: | 
|  | 695 | ; CHECK:         vcvt.f16.u16 d0, d0, #2 | 
|  | 696 | ; CHECK-NEXT:    bx lr | 
|  | 697 | entry: | 
|  | 698 | %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2) | 
|  | 699 | ret <4 x half> %vcvt_n1 | 
|  | 700 | } | 
|  | 701 |  | 
|  | 702 | declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2 | 
|  | 703 |  | 
|  | 704 | define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) { | 
|  | 705 | ; CHECKLABEL: test_vcvtq_n_f16_u16: | 
|  | 706 | ; CHECK:         vcvt.f16.u16 q0, q0, #2 | 
|  | 707 | ; CHECK-NEXT:    bx lr | 
|  | 708 | entry: | 
|  | 709 | %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2) | 
|  | 710 | ret <8 x half> %vcvt_n1 | 
|  | 711 | } | 
|  | 712 |  | 
|  | 713 | declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2 | 
|  | 714 |  | 
|  | 715 | define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) { | 
|  | 716 | ; CHECKLABEL: test_vcvt_n_s16_f16: | 
|  | 717 | ; CHECK:         vcvt.s16.f16 d0, d0, #2 | 
|  | 718 | ; CHECK-NEXT:    bx lr | 
|  | 719 | entry: | 
|  | 720 | %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2) | 
|  | 721 | ret <4 x i16> %vcvt_n1 | 
|  | 722 | } | 
|  | 723 |  | 
|  | 724 | declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2 | 
|  | 725 |  | 
|  | 726 | define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) { | 
|  | 727 | ; CHECKLABEL: test_vcvtq_n_s16_f16: | 
|  | 728 | ; CHECK:         vcvt.s16.f16 q0, q0, #2 | 
|  | 729 | ; CHECK-NEXT:    bx lr | 
|  | 730 | entry: | 
|  | 731 | %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2) | 
|  | 732 | ret <8 x i16> %vcvt_n1 | 
|  | 733 | } | 
|  | 734 |  | 
|  | 735 | declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2 | 
|  | 736 |  | 
|  | 737 | define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) { | 
|  | 738 | ; CHECKLABEL: test_vcvt_n_u16_f16: | 
|  | 739 | ; CHECK:         vcvt.u16.f16 d0, d0, #2 | 
|  | 740 | ; CHECK-NEXT:    bx lr | 
|  | 741 | entry: | 
|  | 742 | %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2) | 
|  | 743 | ret <4 x i16> %vcvt_n1 | 
|  | 744 | } | 
|  | 745 |  | 
|  | 746 | declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2 | 
|  | 747 |  | 
|  | 748 | define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) { | 
|  | 749 | ; CHECKLABEL: test_vcvtq_n_u16_f16: | 
|  | 750 | ; CHECK:         vcvt.u16.f16 q0, q0, #2 | 
|  | 751 | ; CHECK-NEXT:    bx lr | 
|  | 752 | entry: | 
|  | 753 | %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2) | 
|  | 754 | ret <8 x i16> %vcvt_n1 | 
|  | 755 | } | 
|  | 756 |  | 
|  | 757 | declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2 | 
|  | 758 |  | 
|  | 759 | define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 760 | ; CHECKLABEL: test_vmax_f16: | 
|  | 761 | ; CHECK:         vmax.f16 d0, d0, d1 | 
|  | 762 | ; CHECK-NEXT:    bx lr | 
|  | 763 | entry: | 
|  | 764 | %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 765 | ret <4 x half> %vmax_v2.i | 
|  | 766 | } | 
|  | 767 |  | 
|  | 768 | define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 769 | ; CHECKLABEL: test_vmaxq_f16: | 
|  | 770 | ; CHECK:         vmax.f16 q0, q0, q1 | 
|  | 771 | ; CHECK-NEXT:    bx lr | 
|  | 772 | entry: | 
|  | 773 | %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 774 | ret <8 x half> %vmaxq_v2.i | 
|  | 775 | } | 
|  | 776 |  | 
|  | 777 | ; FIXME (PR38404) | 
|  | 778 | ; | 
|  | 779 | ;define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 780 | ;entry: | 
|  | 781 | ;  %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 782 | ;  ret <4 x half> %vmaxnm_v2.i | 
|  | 783 | ;} | 
|  | 784 |  | 
|  | 785 | ;define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 786 | ;entry: | 
|  | 787 | ;  %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 788 | ;  ret <8 x half> %vmaxnmq_v2.i | 
|  | 789 | ;} | 
|  | 790 |  | 
|  | 791 | ;define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 792 | ;entry: | 
|  | 793 | ;  %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 794 | ;  ret <4 x half> %vmin_v2.i | 
|  | 795 | ;} | 
|  | 796 |  | 
|  | 797 | ;define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 798 | ;entry: | 
|  | 799 | ;  %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 800 | ;  ret <8 x half> %vminq_v2.i | 
|  | 801 | ;} | 
|  | 802 |  | 
|  | 803 | ;define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 804 | ;entry: | 
|  | 805 | ;  %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 806 | ;  ret <4 x half> %vminnm_v2.i | 
|  | 807 | ;} | 
|  | 808 |  | 
|  | 809 | ;define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 810 | ;entry: | 
|  | 811 | ;  %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 812 | ;  ret <8 x half> %vminnmq_v2.i | 
|  | 813 | ;} | 
|  | 814 |  | 
|  | 815 | define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 816 | ; CHECKLABEL: test_vmul_f16: | 
|  | 817 | ; CHECK:         vmul.f16 d0, d0, d1 | 
|  | 818 | ; CHECK-NEXT:    bx lr | 
|  | 819 | entry: | 
|  | 820 | %mul.i = fmul <4 x half> %a, %b | 
|  | 821 | ret <4 x half> %mul.i | 
|  | 822 | } | 
|  | 823 |  | 
|  | 824 | define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 825 | ; CHECKLABEL: test_vmulq_f16: | 
|  | 826 | ; CHECK:         vmul.f16 q0, q0, q1 | 
|  | 827 | ; CHECK-NEXT:    bx lr | 
|  | 828 | entry: | 
|  | 829 | %mul.i = fmul <8 x half> %a, %b | 
|  | 830 | ret <8 x half> %mul.i | 
|  | 831 | } | 
|  | 832 |  | 
|  | 833 | define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 834 | ; CHECKLABEL: test_vpadd_f16: | 
|  | 835 | ; CHECK:         vpadd.f16 d0, d0, d1 | 
|  | 836 | ; CHECK-NEXT:    bx lr | 
|  | 837 | entry: | 
|  | 838 | %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 839 | ret <4 x half> %vpadd_v2.i | 
|  | 840 | } | 
|  | 841 |  | 
|  | 842 | define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 843 | ; CHECKLABEL: test_vpmax_f16: | 
|  | 844 | ; CHECK:         vpmax.f16 d0, d0, d1 | 
|  | 845 | ; CHECK-NEXT:    bx lr | 
|  | 846 | entry: | 
|  | 847 | %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 848 | ret <4 x half> %vpmax_v2.i | 
|  | 849 | } | 
|  | 850 |  | 
|  | 851 | define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 852 | ; CHECKLABEL: test_vpmin_f16: | 
|  | 853 | ; CHECK:         vpmin.f16 d0, d0, d1 | 
|  | 854 | ; CHECK-NEXT:    bx lr | 
|  | 855 | entry: | 
|  | 856 | %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 857 | ret <4 x half> %vpmin_v2.i | 
|  | 858 | } | 
|  | 859 |  | 
|  | 860 | define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 861 | ; CHECKLABEL: test_vrecps_f16: | 
|  | 862 | ; CHECK:         vrecps.f16 d0, d0, d1 | 
|  | 863 | ; CHECK-NEXT:    bx lr | 
|  | 864 | entry: | 
|  | 865 | %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 866 | ret <4 x half> %vrecps_v2.i | 
|  | 867 | } | 
|  | 868 |  | 
|  | 869 | define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 870 | ; CHECKLABEL: test_vrecpsq_f16: | 
|  | 871 | ; CHECK:         vrecps.f16 q0, q0, q1 | 
|  | 872 | ; CHECK-NEXT:    bx lr | 
|  | 873 | entry: | 
|  | 874 | %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 875 | ret <8 x half> %vrecpsq_v2.i | 
|  | 876 | } | 
|  | 877 |  | 
|  | 878 | define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 879 | ; CHECKLABEL: test_vrsqrts_f16: | 
|  | 880 | ; CHECK:         vrsqrts.f16 d0, d0, d1 | 
|  | 881 | ; CHECK-NEXT:    bx lr | 
|  | 882 | entry: | 
|  | 883 | %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b) | 
|  | 884 | ret <4 x half> %vrsqrts_v2.i | 
|  | 885 | } | 
|  | 886 |  | 
|  | 887 | define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 888 | ; CHECKLABEL: test_vrsqrtsq_f16: | 
|  | 889 | ; CHECK:         vrsqrts.f16 q0, q0, q1 | 
|  | 890 | ; CHECK-NEXT:    bx lr | 
|  | 891 | entry: | 
|  | 892 | %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b) | 
|  | 893 | ret <8 x half> %vrsqrtsq_v2.i | 
|  | 894 | } | 
|  | 895 |  | 
|  | 896 | define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 897 | ; CHECKLABEL: test_vsub_f16: | 
|  | 898 | ; CHECK:         vsub.f16 d0, d0, d1 | 
|  | 899 | ; CHECK-NEXT:    bx lr | 
|  | 900 | entry: | 
|  | 901 | %sub.i = fsub <4 x half> %a, %b | 
|  | 902 | ret <4 x half> %sub.i | 
|  | 903 | } | 
|  | 904 |  | 
|  | 905 | define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 906 | ; CHECKLABEL: test_vsubq_f16: | 
|  | 907 | ; CHECK:         vsub.f16 q0, q0, q1 | 
|  | 908 | ; CHECK-NEXT:    bx lr | 
|  | 909 | entry: | 
|  | 910 | %sub.i = fsub <8 x half> %a, %b | 
|  | 911 | ret <8 x half> %sub.i | 
|  | 912 | } | 
|  | 913 |  | 
| Sjoerd Meijer | 9b30213 | 2018-08-03 09:12:56 +0000 | [diff] [blame] | 914 | define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { | 
|  | 915 | ; CHECK-LABEL: test_vfma_f16: | 
|  | 916 | ; CHECK:         vfma.f16 d0, d1, d2 | 
|  | 917 | ; CHECK-NEXT:    bx lr | 
|  | 918 | entry: | 
|  | 919 | %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) | 
|  | 920 | ret <4 x half> %0 | 
|  | 921 | } | 
|  | 922 |  | 
|  | 923 | define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { | 
|  | 924 | ; CHECK-LABEL: test_vfmaq_f16: | 
|  | 925 | ; CHECK:         vfma.f16 q0, q1, q2 | 
|  | 926 | ; CHECK-NEXT:    bx lr | 
|  | 927 | entry: | 
|  | 928 | %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) | 
|  | 929 | ret <8 x half> %0 | 
|  | 930 | } | 
|  | 931 |  | 
|  | 932 | define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { | 
|  | 933 | ; CHECK-LABEL: test_vfms_f16: | 
|  | 934 | ; CHECK:         vneg.f16 [[D16:d[0-9]+]], d1 | 
|  | 935 | ; CHECK-NEXT:    vfma.f16 d0, [[D16]], d2 | 
|  | 936 | ; CHECK-NEXT:    bx lr | 
|  | 937 | entry: | 
|  | 938 | %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b | 
|  | 939 | %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a) | 
|  | 940 | ret <4 x half> %0 | 
|  | 941 | } | 
|  | 942 |  | 
|  | 943 | define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { | 
|  | 944 | ; CHECK-LABEL: test_vfmsq_f16: | 
|  | 945 | ; CHECK:         vneg.f16 [[Q8:q[0-9]+]], q1 | 
|  | 946 | ; CHECK-NEXT:    vfma.f16 q0, [[Q8]], q2 | 
|  | 947 | ; CHECK-NEXT:    bx lr | 
|  | 948 | entry: | 
|  | 949 | %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b | 
|  | 950 | %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a) | 
|  | 951 | ret <8 x half> %0 | 
|  | 952 | } | 
|  | 953 |  | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 954 | ; FIXME (PR38404) | 
|  | 955 | ; | 
| Sjoerd Meijer | 590e4e8 | 2018-08-01 14:43:59 +0000 | [diff] [blame] | 956 | ;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 957 | ;entry: | 
|  | 958 | ;  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | 
|  | 959 | ;  %mul = fmul <4 x half> %shuffle, %a | 
|  | 960 | ;  ret <4 x half> %mul | 
|  | 961 | ;} | 
|  | 962 |  | 
|  | 963 | ;define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) { | 
|  | 964 | ;entry: | 
|  | 965 | ;  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> | 
|  | 966 | ;  %mul = fmul <8 x half> %shuffle, %a | 
|  | 967 | ;  ret <8 x half> %mul | 
|  | 968 | ;} | 
|  | 969 |  | 
|  | 970 | ;define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) { | 
|  | 971 | ;entry: | 
|  | 972 | ;  %0 = bitcast float %b.coerce to i32 | 
|  | 973 | ;  %tmp.0.extract.trunc = trunc i32 %0 to i16 | 
|  | 974 | ;  %1 = bitcast i16 %tmp.0.extract.trunc to half | 
|  | 975 | ;  %vecinit = insertelement <4 x half> undef, half %1, i32 0 | 
|  | 976 | ;  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer | 
|  | 977 | ;  %mul = fmul <4 x half> %vecinit4, %a | 
|  | 978 | ;  ret <4 x half> %mul | 
|  | 979 | ;} | 
|  | 980 |  | 
|  | 981 | ;define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) { | 
|  | 982 | ;entry: | 
|  | 983 | ;  %0 = bitcast float %b.coerce to i32 | 
|  | 984 | ;  %tmp.0.extract.trunc = trunc i32 %0 to i16 | 
|  | 985 | ;  %1 = bitcast i16 %tmp.0.extract.trunc to half | 
|  | 986 | ;  %vecinit = insertelement <8 x half> undef, half %1, i32 0 | 
|  | 987 | ;  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer | 
|  | 988 | ;  %mul = fmul <8 x half> %vecinit8, %a | 
|  | 989 | ;  ret <8 x half> %mul | 
|  | 990 | ;} | 
|  | 991 |  | 
|  | 992 | define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) { | 
|  | 993 | ; CHECKLABEL: test_vbsl_f16: | 
|  | 994 | ; CHECK:         vbsl d0, d1, d2 | 
|  | 995 | ; CHECK-NEXT:    bx lr | 
|  | 996 | entry: | 
|  | 997 | %0 = bitcast <4 x i16> %a to <8 x i8> | 
|  | 998 | %1 = bitcast <4 x half> %b to <8 x i8> | 
|  | 999 | %2 = bitcast <4 x half> %c to <8 x i8> | 
|  | 1000 | %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) | 
|  | 1001 | %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half> | 
|  | 1002 | ret <4 x half> %3 | 
|  | 1003 | } | 
|  | 1004 |  | 
|  | 1005 | define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) { | 
|  | 1006 | ; CHECKLABEL: test_vbslq_f16: | 
|  | 1007 | ; CHECK:         vbsl q0, q1, q2 | 
|  | 1008 | ; CHECK-NEXT:    bx lr | 
|  | 1009 | entry: | 
|  | 1010 | %0 = bitcast <8 x i16> %a to <16 x i8> | 
|  | 1011 | %1 = bitcast <8 x half> %b to <16 x i8> | 
|  | 1012 | %2 = bitcast <8 x half> %c to <16 x i8> | 
|  | 1013 | %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) | 
|  | 1014 | %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half> | 
|  | 1015 | ret <8 x half> %3 | 
|  | 1016 | } | 
|  | 1017 |  | 
|  | 1018 | ; FIXME (PR38404) | 
|  | 1019 | ; | 
|  | 1020 | ;define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 1021 | ;entry: | 
|  | 1022 | ;  %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> | 
|  | 1023 | ;  %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7> | 
|  | 1024 | ;  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0 | 
|  | 1025 | ;  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1 | 
|  | 1026 | ;  ret %struct.float16x4x2_t %.fca.0.1.insert | 
|  | 1027 | ;} | 
|  | 1028 | ; | 
|  | 1029 | ;define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 1030 | ;entry: | 
|  | 1031 | ;  %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> | 
|  | 1032 | ;  %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> | 
|  | 1033 | ;  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0 | 
|  | 1034 | ;  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1 | 
|  | 1035 | ;  ret %struct.float16x8x2_t %.fca.0.1.insert | 
|  | 1036 | ;} | 
|  | 1037 | ; | 
|  | 1038 | ;define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 1039 | ;entry: | 
|  | 1040 | ;  %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | 
|  | 1041 | ;  %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | 
|  | 1042 | ;  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0 | 
|  | 1043 | ;  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1 | 
|  | 1044 | ;  ret %struct.float16x4x2_t %.fca.0.1.insert | 
|  | 1045 | ;} | 
|  | 1046 | ; | 
|  | 1047 | ;define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 1048 | ;entry: | 
|  | 1049 | ;  %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | 
|  | 1050 | ;  %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | 
|  | 1051 | ;  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0 | 
|  | 1052 | ;  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1 | 
|  | 1053 | ;  ret %struct.float16x8x2_t %.fca.0.1.insert | 
|  | 1054 | ;} | 
|  | 1055 | ; | 
|  | 1056 | ;define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 1057 | ;entry: | 
|  | 1058 | ;  %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> | 
|  | 1059 | ;  %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> | 
|  | 1060 | ;  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0 | 
|  | 1061 | ;  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1 | 
|  | 1062 | ;  ret %struct.float16x4x2_t %.fca.0.1.insert | 
|  | 1063 | ;} | 
|  | 1064 | ; | 
|  | 1065 | ;define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 1066 | ;entry: | 
|  | 1067 | ;  %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> | 
|  | 1068 | ;  %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> | 
|  | 1069 | ;  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0 | 
|  | 1070 | ;  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1 | 
|  | 1071 | ;  ret %struct.float16x8x2_t %.fca.0.1.insert | 
|  | 1072 | ;} | 
|  | 1073 | ; | 
|  | 1074 | ;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) { | 
|  | 1075 | ;entry: | 
|  | 1076 | ;  %0 = bitcast float %a.coerce to i32 | 
|  | 1077 | ;  %tmp.0.extract.trunc = trunc i32 %0 to i16 | 
|  | 1078 | ;  %1 = bitcast i16 %tmp.0.extract.trunc to half | 
|  | 1079 | ;  %vecinit = insertelement <4 x half> undef, half %1, i32 0 | 
|  | 1080 | ;  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer | 
|  | 1081 | ;  ret <4 x half> %vecinit4 | 
|  | 1082 | ;} | 
|  | 1083 | ; | 
|  | 1084 | ;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) { | 
|  | 1085 | ;entry: | 
|  | 1086 | ;  %0 = bitcast float %a.coerce to i32 | 
|  | 1087 | ;  %tmp.0.extract.trunc = trunc i32 %0 to i16 | 
|  | 1088 | ;  %1 = bitcast i16 %tmp.0.extract.trunc to half | 
|  | 1089 | ;  %vecinit = insertelement <8 x half> undef, half %1, i32 0 | 
|  | 1090 | ;  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer | 
|  | 1091 | ;  ret <8 x half> %vecinit8 | 
|  | 1092 | ;} | 
|  | 1093 | ; | 
|  | 1094 | ;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) { | 
|  | 1095 | ;entry: | 
|  | 1096 | ;  %0 = bitcast float %a.coerce to i32 | 
|  | 1097 | ;  %tmp.0.extract.trunc = trunc i32 %0 to i16 | 
|  | 1098 | ;  %1 = bitcast i16 %tmp.0.extract.trunc to half | 
|  | 1099 | ;  %vecinit = insertelement <4 x half> undef, half %1, i32 0 | 
|  | 1100 | ;  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer | 
|  | 1101 | ;  ret <4 x half> %vecinit4 | 
|  | 1102 | ;} | 
|  | 1103 | ; | 
|  | 1104 | ;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) { | 
|  | 1105 | ;entry: | 
|  | 1106 | ;  %0 = bitcast float %a.coerce to i32 | 
|  | 1107 | ;  %tmp.0.extract.trunc = trunc i32 %0 to i16 | 
|  | 1108 | ;  %1 = bitcast i16 %tmp.0.extract.trunc to half | 
|  | 1109 | ;  %vecinit = insertelement <8 x half> undef, half %1, i32 0 | 
|  | 1110 | ;  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer | 
|  | 1111 | ;  ret <8 x half> %vecinit8 | 
|  | 1112 | ;} | 
|  | 1113 | ; | 
|  | 1114 | ;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) { | 
|  | 1115 | ;entry: | 
|  | 1116 | ;  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | 
|  | 1117 | ;  ret <4 x half> %shuffle | 
|  | 1118 | ;} | 
|  | 1119 | ; | 
|  | 1120 | ;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) { | 
|  | 1121 | ;entry: | 
|  | 1122 | ;  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> | 
|  | 1123 | ;  ret <8 x half> %shuffle | 
|  | 1124 | ;} | 
|  | 1125 | ; | 
|  | 1126 | ;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) { | 
|  | 1127 | ;entry: | 
|  | 1128 | ;  %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> | 
|  | 1129 | ;  ret <4 x half> %vext | 
|  | 1130 | ;} | 
|  | 1131 | ; | 
|  | 1132 | ;define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) { | 
|  | 1133 | ;entry: | 
|  | 1134 | ;  %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> | 
|  | 1135 | ;  ret <8 x half> %vext | 
|  | 1136 | ;} | 
|  | 1137 | ; | 
|  | 1138 | ;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) { | 
|  | 1139 | ;entry: | 
|  | 1140 | ;  %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | 1141 | ;  ret <4 x half> %shuffle.i | 
|  | 1142 | ;} | 
|  | 1143 | ; | 
|  | 1144 | ;define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) { | 
|  | 1145 | ;entry: | 
|  | 1146 | ;  %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> | 
|  | 1147 | ;  ret <8 x half> %shuffle.i | 
|  | 1148 | ;} | 
|  | 1149 |  | 
|  | 1150 | declare <4 x half> @llvm.fabs.v4f16(<4 x half>) | 
|  | 1151 | declare <8 x half> @llvm.fabs.v8f16(<8 x half>) | 
|  | 1152 | declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>) | 
|  | 1153 | declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>) | 
|  | 1154 | declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>) | 
|  | 1155 | declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>) | 
|  | 1156 | declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>) | 
|  | 1157 | declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>) | 
|  | 1158 | declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>) | 
|  | 1159 | declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>) | 
|  | 1160 | declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>) | 
|  | 1161 | declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>) | 
|  | 1162 | declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>) | 
|  | 1163 | declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>) | 
|  | 1164 | declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>) | 
|  | 1165 | declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>) | 
|  | 1166 | declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>) | 
|  | 1167 | declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>) | 
|  | 1168 | declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>) | 
|  | 1169 | declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>) | 
|  | 1170 | declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>) | 
|  | 1171 | declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>) | 
|  | 1172 | declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>) | 
|  | 1173 | declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>) | 
|  | 1174 | declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>) | 
|  | 1175 | declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>) | 
|  | 1176 | declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>) | 
|  | 1177 | declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>) | 
|  | 1178 | declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>) | 
|  | 1179 | declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>) | 
|  | 1180 | declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>) | 
|  | 1181 | declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>) | 
|  | 1182 | declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>) | 
|  | 1183 | declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>) | 
|  | 1184 | declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>) | 
|  | 1185 | declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>) | 
|  | 1186 | declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>) | 
|  | 1187 | declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>) | 
|  | 1188 | declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>) | 
|  | 1189 | declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>) | 
|  | 1190 | declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>) | 
|  | 1191 | declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>) | 
|  | 1192 | declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>) | 
|  | 1193 | declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>) | 
|  | 1194 | declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>) | 
|  | 1195 | declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>) | 
|  | 1196 | declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>) | 
|  | 1197 | declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>) | 
|  | 1198 | declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>) | 
|  | 1199 | declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>) | 
|  | 1200 | declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>) | 
|  | 1201 | declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>) | 
|  | 1202 | declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>) | 
|  | 1203 | declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>) | 
|  | 1204 | declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) | 
|  | 1205 | declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) | 
|  | 1206 | declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) | 
|  | 1207 | declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) |