Gabor Buella | 890e363 | 2018-05-30 15:25:16 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK |
| 3 | |
| 4 | define <4 x float> @test_mm_fmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 5 | ; CHECK-LABEL: test_mm_fmadd_ps: |
| 6 | ; CHECK: # %bb.0: # %entry |
| 7 | ; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 8 | ; CHECK-NEXT: retq |
| 9 | entry: |
| 10 | %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
| 11 | ret <4 x float> %0 |
| 12 | } |
| 13 | |
| 14 | define <2 x double> @test_mm_fmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 15 | ; CHECK-LABEL: test_mm_fmadd_pd: |
| 16 | ; CHECK: # %bb.0: # %entry |
| 17 | ; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 18 | ; CHECK-NEXT: retq |
| 19 | entry: |
| 20 | %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 |
| 21 | ret <2 x double> %0 |
| 22 | } |
| 23 | |
| 24 | define <4 x float> @test_mm_fmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 25 | ; CHECK-LABEL: test_mm_fmadd_ss: |
| 26 | ; CHECK: # %bb.0: # %entry |
| 27 | ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 28 | ; CHECK-NEXT: retq |
| 29 | entry: |
| 30 | %0 = extractelement <4 x float> %a, i64 0 |
| 31 | %1 = extractelement <4 x float> %b, i64 0 |
| 32 | %2 = extractelement <4 x float> %c, i64 0 |
| 33 | %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 |
| 34 | %4 = insertelement <4 x float> %a, float %3, i64 0 |
| 35 | ret <4 x float> %4 |
| 36 | } |
| 37 | |
| 38 | define <2 x double> @test_mm_fmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 39 | ; CHECK-LABEL: test_mm_fmadd_sd: |
| 40 | ; CHECK: # %bb.0: # %entry |
| 41 | ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 42 | ; CHECK-NEXT: retq |
| 43 | entry: |
| 44 | %0 = extractelement <2 x double> %a, i64 0 |
| 45 | %1 = extractelement <2 x double> %b, i64 0 |
| 46 | %2 = extractelement <2 x double> %c, i64 0 |
| 47 | %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 |
| 48 | %4 = insertelement <2 x double> %a, double %3, i64 0 |
| 49 | ret <2 x double> %4 |
| 50 | } |
| 51 | |
| 52 | define <4 x float> @test_mm_fmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 53 | ; CHECK-LABEL: test_mm_fmsub_ps: |
| 54 | ; CHECK: # %bb.0: # %entry |
| 55 | ; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 |
| 56 | ; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 57 | ; CHECK-NEXT: retq |
| 58 | entry: |
| 59 | %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 60 | %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 |
| 61 | ret <4 x float> %0 |
| 62 | } |
| 63 | |
| 64 | define <2 x double> @test_mm_fmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 65 | ; CHECK-LABEL: test_mm_fmsub_pd: |
| 66 | ; CHECK: # %bb.0: # %entry |
| 67 | ; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2 |
| 68 | ; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 69 | ; CHECK-NEXT: retq |
| 70 | entry: |
| 71 | %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c |
| 72 | %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 |
| 73 | ret <2 x double> %0 |
| 74 | } |
| 75 | |
| 76 | define <4 x float> @test_mm_fmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 77 | ; CHECK-LABEL: test_mm_fmsub_ss: |
| 78 | ; CHECK: # %bb.0: # %entry |
| 79 | ; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 |
| 80 | ; CHECK-NEXT: retq |
| 81 | entry: |
| 82 | %0 = extractelement <4 x float> %a, i64 0 |
| 83 | %1 = extractelement <4 x float> %b, i64 0 |
| 84 | %.rhs.i = extractelement <4 x float> %c, i64 0 |
| 85 | %2 = fsub float -0.000000e+00, %.rhs.i |
| 86 | %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 |
| 87 | %4 = insertelement <4 x float> %a, float %3, i64 0 |
| 88 | ret <4 x float> %4 |
| 89 | } |
| 90 | |
| 91 | define <2 x double> @test_mm_fmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 92 | ; CHECK-LABEL: test_mm_fmsub_sd: |
| 93 | ; CHECK: # %bb.0: # %entry |
| 94 | ; CHECK-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 |
| 95 | ; CHECK-NEXT: retq |
| 96 | entry: |
| 97 | %0 = extractelement <2 x double> %a, i64 0 |
| 98 | %1 = extractelement <2 x double> %b, i64 0 |
| 99 | %.rhs.i = extractelement <2 x double> %c, i64 0 |
| 100 | %2 = fsub double -0.000000e+00, %.rhs.i |
| 101 | %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 |
| 102 | %4 = insertelement <2 x double> %a, double %3, i64 0 |
| 103 | ret <2 x double> %4 |
| 104 | } |
| 105 | |
| 106 | define <4 x float> @test_mm_fnmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 107 | ; CHECK-LABEL: test_mm_fnmadd_ps: |
| 108 | ; CHECK: # %bb.0: # %entry |
| 109 | ; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 |
| 110 | ; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 111 | ; CHECK-NEXT: retq |
| 112 | entry: |
| 113 | %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a |
| 114 | %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) #2 |
| 115 | ret <4 x float> %0 |
| 116 | } |
| 117 | |
| 118 | define <2 x double> @test_mm_fnmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 119 | ; CHECK-LABEL: test_mm_fnmadd_pd: |
| 120 | ; CHECK: # %bb.0: # %entry |
| 121 | ; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 |
| 122 | ; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 |
| 123 | ; CHECK-NEXT: retq |
| 124 | entry: |
| 125 | %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a |
| 126 | %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) #2 |
| 127 | ret <2 x double> %0 |
| 128 | } |
| 129 | |
| 130 | define <4 x float> @test_mm_fnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 131 | ; CHECK-LABEL: test_mm_fnmadd_ss: |
| 132 | ; CHECK: # %bb.0: # %entry |
| 133 | ; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 |
| 134 | ; CHECK-NEXT: retq |
| 135 | entry: |
| 136 | %0 = extractelement <4 x float> %a, i64 0 |
| 137 | %.rhs.i = extractelement <4 x float> %b, i64 0 |
| 138 | %1 = fsub float -0.000000e+00, %.rhs.i |
| 139 | %2 = extractelement <4 x float> %c, i64 0 |
| 140 | %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 |
| 141 | %4 = insertelement <4 x float> %a, float %3, i64 0 |
| 142 | ret <4 x float> %4 |
| 143 | } |
| 144 | |
| 145 | define <2 x double> @test_mm_fnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 146 | ; CHECK-LABEL: test_mm_fnmadd_sd: |
| 147 | ; CHECK: # %bb.0: # %entry |
| 148 | ; CHECK-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 |
| 149 | ; CHECK-NEXT: retq |
| 150 | entry: |
| 151 | %0 = extractelement <2 x double> %a, i64 0 |
| 152 | %.rhs.i = extractelement <2 x double> %b, i64 0 |
| 153 | %1 = fsub double -0.000000e+00, %.rhs.i |
| 154 | %2 = extractelement <2 x double> %c, i64 0 |
| 155 | %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 |
| 156 | %4 = insertelement <2 x double> %a, double %3, i64 0 |
| 157 | ret <2 x double> %4 |
| 158 | } |
| 159 | |
| 160 | define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 161 | ; CHECK-LABEL: test_mm_fnmsub_ps: |
| 162 | ; CHECK: # %bb.0: # %entry |
| 163 | ; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] |
| 164 | ; CHECK-NEXT: vxorps %xmm3, %xmm0, %xmm4 |
| 165 | ; CHECK-NEXT: vxorps %xmm3, %xmm2, %xmm0 |
| 166 | ; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0 |
| 167 | ; CHECK-NEXT: retq |
| 168 | entry: |
| 169 | %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a |
| 170 | %sub1.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 171 | %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub1.i) #2 |
| 172 | ret <4 x float> %0 |
| 173 | } |
| 174 | |
| 175 | define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 176 | ; CHECK-LABEL: test_mm_fnmsub_pd: |
| 177 | ; CHECK: # %bb.0: # %entry |
| 178 | ; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00] |
| 179 | ; CHECK-NEXT: vxorpd %xmm3, %xmm0, %xmm4 |
| 180 | ; CHECK-NEXT: vxorpd %xmm3, %xmm2, %xmm0 |
| 181 | ; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0 |
| 182 | ; CHECK-NEXT: retq |
| 183 | entry: |
| 184 | %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a |
| 185 | %sub1.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c |
| 186 | %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub1.i) #2 |
| 187 | ret <2 x double> %0 |
| 188 | } |
| 189 | |
| 190 | define <4 x float> @test_mm_fnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 191 | ; CHECK-LABEL: test_mm_fnmsub_ss: |
| 192 | ; CHECK: # %bb.0: # %entry |
| 193 | ; CHECK-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 |
| 194 | ; CHECK-NEXT: retq |
| 195 | entry: |
| 196 | %0 = extractelement <4 x float> %a, i64 0 |
| 197 | %.rhs.i = extractelement <4 x float> %b, i64 0 |
| 198 | %1 = fsub float -0.000000e+00, %.rhs.i |
| 199 | %.rhs2.i = extractelement <4 x float> %c, i64 0 |
| 200 | %2 = fsub float -0.000000e+00, %.rhs2.i |
| 201 | %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 |
| 202 | %4 = insertelement <4 x float> %a, float %3, i64 0 |
| 203 | ret <4 x float> %4 |
| 204 | } |
| 205 | |
| 206 | define <2 x double> @test_mm_fnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 207 | ; CHECK-LABEL: test_mm_fnmsub_sd: |
| 208 | ; CHECK: # %bb.0: # %entry |
| 209 | ; CHECK-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 |
| 210 | ; CHECK-NEXT: retq |
| 211 | entry: |
| 212 | %0 = extractelement <2 x double> %a, i64 0 |
| 213 | %.rhs.i = extractelement <2 x double> %b, i64 0 |
| 214 | %1 = fsub double -0.000000e+00, %.rhs.i |
| 215 | %.rhs2.i = extractelement <2 x double> %c, i64 0 |
| 216 | %2 = fsub double -0.000000e+00, %.rhs2.i |
| 217 | %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 |
| 218 | %4 = insertelement <2 x double> %a, double %3, i64 0 |
| 219 | ret <2 x double> %4 |
| 220 | } |
| 221 | |
| 222 | define <4 x float> @test_mm_fmaddsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 223 | ; CHECK-LABEL: test_mm_fmaddsub_ps: |
| 224 | ; CHECK: # %bb.0: # %entry |
| 225 | ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 |
| 226 | ; CHECK-NEXT: retq |
| 227 | entry: |
| 228 | %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
| 229 | %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 230 | %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %1) #2 |
| 231 | %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| 232 | ret <4 x float> %3 |
| 233 | } |
| 234 | |
| 235 | define <2 x double> @test_mm_fmaddsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 236 | ; CHECK-LABEL: test_mm_fmaddsub_pd: |
| 237 | ; CHECK: # %bb.0: # %entry |
| 238 | ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 |
| 239 | ; CHECK-NEXT: retq |
| 240 | entry: |
| 241 | %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 |
| 242 | %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c |
| 243 | %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %1) #2 |
| 244 | %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> <i32 0, i32 3> |
| 245 | ret <2 x double> %3 |
| 246 | } |
| 247 | |
| 248 | define <4 x float> @test_mm_fmsubadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
| 249 | ; CHECK-LABEL: test_mm_fmsubadd_ps: |
| 250 | ; CHECK: # %bb.0: # %entry |
| 251 | ; CHECK-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 |
| 252 | ; CHECK-NEXT: retq |
| 253 | entry: |
| 254 | %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 255 | %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 |
| 256 | %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
| 257 | %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| 258 | ret <4 x float> %2 |
| 259 | } |
| 260 | |
| 261 | define <2 x double> @test_mm_fmsubadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
| 262 | ; CHECK-LABEL: test_mm_fmsubadd_pd: |
| 263 | ; CHECK: # %bb.0: # %entry |
| 264 | ; CHECK-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 |
| 265 | ; CHECK-NEXT: retq |
| 266 | entry: |
| 267 | %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c |
| 268 | %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 |
| 269 | %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 |
| 270 | %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> <i32 0, i32 3> |
| 271 | ret <2 x double> %2 |
| 272 | } |
| 273 | |
| 274 | define <8 x float> @test_mm256_fmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
| 275 | ; CHECK-LABEL: test_mm256_fmadd_ps: |
| 276 | ; CHECK: # %bb.0: # %entry |
| 277 | ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
| 278 | ; CHECK-NEXT: retq |
| 279 | entry: |
| 280 | %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 |
| 281 | ret <8 x float> %0 |
| 282 | } |
| 283 | |
| 284 | define <4 x double> @test_mm256_fmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { |
| 285 | ; CHECK-LABEL: test_mm256_fmadd_pd: |
| 286 | ; CHECK: # %bb.0: # %entry |
| 287 | ; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
| 288 | ; CHECK-NEXT: retq |
| 289 | entry: |
| 290 | %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 |
| 291 | ret <4 x double> %0 |
| 292 | } |
| 293 | |
| 294 | define <8 x float> @test_mm256_fmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
| 295 | ; CHECK-LABEL: test_mm256_fmsub_ps: |
| 296 | ; CHECK: # %bb.0: # %entry |
| 297 | ; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm2, %ymm2 |
| 298 | ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
| 299 | ; CHECK-NEXT: retq |
| 300 | entry: |
| 301 | %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 302 | %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 |
| 303 | ret <8 x float> %0 |
| 304 | } |
| 305 | |
| 306 | define <4 x double> @test_mm256_fmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { |
| 307 | ; CHECK-LABEL: test_mm256_fmsub_pd: |
| 308 | ; CHECK: # %bb.0: # %entry |
| 309 | ; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm2, %ymm2 |
| 310 | ; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
| 311 | ; CHECK-NEXT: retq |
| 312 | entry: |
| 313 | %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c |
| 314 | %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 |
| 315 | ret <4 x double> %0 |
| 316 | } |
| 317 | |
| 318 | define <8 x float> @test_mm256_fnmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
| 319 | ; CHECK-LABEL: test_mm256_fnmadd_ps: |
| 320 | ; CHECK: # %bb.0: # %entry |
| 321 | ; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 |
| 322 | ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
| 323 | ; CHECK-NEXT: retq |
| 324 | entry: |
| 325 | %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a |
| 326 | %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %c) #2 |
| 327 | ret <8 x float> %0 |
| 328 | } |
| 329 | |
| 330 | define <4 x double> @test_mm256_fnmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { |
| 331 | ; CHECK-LABEL: test_mm256_fnmadd_pd: |
| 332 | ; CHECK: # %bb.0: # %entry |
| 333 | ; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 |
| 334 | ; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
| 335 | ; CHECK-NEXT: retq |
| 336 | entry: |
| 337 | %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a |
| 338 | %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %c) #2 |
| 339 | ret <4 x double> %0 |
| 340 | } |
| 341 | |
| 342 | define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
| 343 | ; CHECK-LABEL: test_mm256_fnmsub_ps: |
| 344 | ; CHECK: # %bb.0: # %entry |
| 345 | ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] |
| 346 | ; CHECK-NEXT: vxorps %ymm3, %ymm0, %ymm4 |
| 347 | ; CHECK-NEXT: vxorps %ymm3, %ymm2, %ymm0 |
| 348 | ; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0 |
| 349 | ; CHECK-NEXT: retq |
| 350 | entry: |
| 351 | %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a |
| 352 | %sub1.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 353 | %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %sub1.i) #2 |
| 354 | ret <8 x float> %0 |
| 355 | } |
| 356 | |
| 357 | define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { |
| 358 | ; CHECK-LABEL: test_mm256_fnmsub_pd: |
| 359 | ; CHECK: # %bb.0: # %entry |
| 360 | ; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] |
| 361 | ; CHECK-NEXT: vxorpd %ymm3, %ymm0, %ymm4 |
| 362 | ; CHECK-NEXT: vxorpd %ymm3, %ymm2, %ymm0 |
| 363 | ; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0 |
| 364 | ; CHECK-NEXT: retq |
| 365 | entry: |
| 366 | %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a |
| 367 | %sub1.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c |
| 368 | %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %sub1.i) #2 |
| 369 | ret <4 x double> %0 |
| 370 | } |
| 371 | |
| 372 | define <8 x float> @test_mm256_fmaddsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
| 373 | ; CHECK-LABEL: test_mm256_fmaddsub_ps: |
| 374 | ; CHECK: # %bb.0: # %entry |
| 375 | ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 |
| 376 | ; CHECK-NEXT: retq |
| 377 | entry: |
| 378 | %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 |
| 379 | %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 380 | %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %1) #2 |
| 381 | %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| 382 | ret <8 x float> %3 |
| 383 | } |
| 384 | |
| 385 | define <4 x double> @test_mm256_fmaddsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { |
| 386 | ; CHECK-LABEL: test_mm256_fmaddsub_pd: |
| 387 | ; CHECK: # %bb.0: # %entry |
| 388 | ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 |
| 389 | ; CHECK-NEXT: retq |
| 390 | entry: |
| 391 | %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 |
| 392 | %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c |
| 393 | %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %1) #2 |
| 394 | %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| 395 | ret <4 x double> %3 |
| 396 | } |
| 397 | |
| 398 | define <8 x float> @test_mm256_fmsubadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
| 399 | ; CHECK-LABEL: test_mm256_fmsubadd_ps: |
| 400 | ; CHECK: # %bb.0: # %entry |
| 401 | ; CHECK-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 |
| 402 | ; CHECK-NEXT: retq |
| 403 | entry: |
| 404 | %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
| 405 | %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 |
| 406 | %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 |
| 407 | %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| 408 | ret <8 x float> %2 |
| 409 | } |
| 410 | |
| 411 | define <4 x double> @test_mm256_fmsubadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { |
| 412 | ; CHECK-LABEL: test_mm256_fmsubadd_pd: |
| 413 | ; CHECK: # %bb.0: # %entry |
| 414 | ; CHECK-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 |
| 415 | ; CHECK-NEXT: retq |
| 416 | entry: |
| 417 | %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c |
| 418 | %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 |
| 419 | %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 |
| 420 | %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| 421 | ret <4 x double> %2 |
| 422 | } |
| 423 | |
| 424 | declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 |
| 425 | declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1 |
| 426 | declare float @llvm.fma.f32(float, float, float) #1 |
| 427 | declare double @llvm.fma.f64(double, double, double) #1 |
| 428 | declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #1 |
| 429 | declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1 |