| Simon Pilgrim | f9648b7 | 2016-10-07 16:00:59 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE |
| 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX |
| 3 | |
| 4 | ; |
| 5 | ; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443), |
| 6 | ; so we need to edit it to remove the NAN constant comments |
| 7 | ; |
| 8 | |
| 9 | ; copysign(x, c1) -> fabs(x) iff ispos(c1) |
| 10 | define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) { |
| 11 | ; SSE-LABEL: combine_vec_fcopysign_pos_constant0: |
| 12 | ; SSE: # BB#0: |
| 13 | ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] |
| 14 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm1 |
| 15 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 16 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 17 | ; SSE-NEXT: retq |
| 18 | ; |
| 19 | ; AVX-LABEL: combine_vec_fcopysign_pos_constant0: |
| 20 | ; AVX: # BB#0: |
| 21 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 22 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 23 | ; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1 |
| 24 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 25 | ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 |
| 26 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 27 | ; AVX-NEXT: retq |
| 28 | %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>) |
| 29 | ret <4 x float> %1 |
| 30 | } |
| 31 | |
| 32 | define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) { |
| 33 | ; SSE-LABEL: combine_vec_fcopysign_pos_constant1: |
| 34 | ; SSE: # BB#0: |
| 35 | ; SSE-NEXT: movaps {{.*#+}} xmm1 = [0.000000e+00,2.000000e+00,4.000000e+00,8.000000e+00] |
| 36 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm1 |
| 37 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 38 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 39 | ; SSE-NEXT: retq |
| 40 | ; |
| 41 | ; AVX-LABEL: combine_vec_fcopysign_pos_constant1: |
| 42 | ; AVX: # BB#0: |
| 43 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 44 | ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| 45 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 46 | ; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 |
| 47 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 48 | ; AVX-NEXT: retq |
| 49 | %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>) |
| 50 | ret <4 x float> %1 |
| 51 | } |
| 52 | |
| 53 | define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %y) { |
| 54 | ; SSE-LABEL: combine_vec_fcopysign_fabs_sgn: |
| 55 | ; SSE: # BB#0: |
| 56 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 57 | ; SSE-NEXT: retq |
| 58 | ; |
| 59 | ; AVX-LABEL: combine_vec_fcopysign_fabs_sgn: |
| 60 | ; AVX: # BB#0: |
| 61 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 62 | ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| 63 | ; AVX-NEXT: retq |
| 64 | %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y) |
| 65 | %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1) |
| 66 | ret <4 x float> %2 |
| 67 | } |
| 68 | |
| 69 | ; copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) |
| 70 | define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) { |
| 71 | ; SSE-LABEL: combine_vec_fcopysign_neg_constant0: |
| 72 | ; SSE: # BB#0: |
| 73 | ; SSE-NEXT: movaps {{.*#+}} xmm1 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00] |
| 74 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm1 |
| 75 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 76 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 77 | ; SSE-NEXT: retq |
| 78 | ; |
| 79 | ; AVX-LABEL: combine_vec_fcopysign_neg_constant0: |
| 80 | ; AVX: # BB#0: |
| 81 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 82 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 83 | ; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1 |
| 84 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 85 | ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 |
| 86 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 87 | ; AVX-NEXT: retq |
| 88 | %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>) |
| 89 | ret <4 x float> %1 |
| 90 | } |
| 91 | |
| 92 | define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) { |
| 93 | ; SSE-LABEL: combine_vec_fcopysign_neg_constant1: |
| 94 | ; SSE: # BB#0: |
| 95 | ; SSE-NEXT: movaps {{.*#+}} xmm1 = [-0.000000e+00,-2.000000e+00,-4.000000e+00,-8.000000e+00] |
| 96 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm1 |
| 97 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 98 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 99 | ; SSE-NEXT: retq |
| 100 | ; |
| 101 | ; AVX-LABEL: combine_vec_fcopysign_neg_constant1: |
| 102 | ; AVX: # BB#0: |
| 103 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 104 | ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| 105 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 106 | ; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 |
| 107 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 108 | ; AVX-NEXT: retq |
| 109 | %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>) |
| 110 | ret <4 x float> %1 |
| 111 | } |
| 112 | |
| 113 | define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) { |
| 114 | ; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn: |
| 115 | ; SSE: # BB#0: |
| 116 | ; SSE-NEXT: movaps {{.*#+}} xmm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] |
| 117 | ; SSE-NEXT: orps %xmm2, %xmm1 |
| 118 | ; SSE-NEXT: andps %xmm2, %xmm1 |
| 119 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 120 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 121 | ; SSE-NEXT: retq |
| 122 | ; |
| 123 | ; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn: |
| 124 | ; AVX: # BB#0: |
| 125 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 126 | ; AVX-NEXT: vorps %xmm2, %xmm1, %xmm1 |
| 127 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 |
| 128 | ; AVX-NEXT: vandps %xmm3, %xmm0, %xmm0 |
| 129 | ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 |
| 130 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 131 | ; AVX-NEXT: retq |
| 132 | %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y) |
| 133 | %2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1 |
| 134 | %3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2) |
| 135 | ret <4 x float> %3 |
| 136 | } |
| 137 | |
| 138 | ; copysign(fabs(x), y) -> copysign(x, y) |
| 139 | define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %y) { |
| 140 | ; SSE-LABEL: combine_vec_fcopysign_fabs_mag: |
| 141 | ; SSE: # BB#0: |
| 142 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm1 |
| 143 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 144 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 145 | ; SSE-NEXT: retq |
| 146 | ; |
| 147 | ; AVX-LABEL: combine_vec_fcopysign_fabs_mag: |
| 148 | ; AVX: # BB#0: |
| 149 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 150 | ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 |
| 151 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 152 | ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 |
| 153 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 154 | ; AVX-NEXT: retq |
| 155 | %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) |
| 156 | %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y) |
| 157 | ret <4 x float> %2 |
| 158 | } |
| 159 | |
| 160 | ; copysign(fneg(x), y) -> copysign(x, y) |
| 161 | define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %y) { |
| 162 | ; SSE-LABEL: combine_vec_fcopysign_fneg_mag: |
| 163 | ; SSE: # BB#0: |
| 164 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm1 |
| 165 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 166 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 167 | ; SSE-NEXT: retq |
| 168 | ; |
| 169 | ; AVX-LABEL: combine_vec_fcopysign_fneg_mag: |
| 170 | ; AVX: # BB#0: |
| 171 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 172 | ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 |
| 173 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 174 | ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 |
| 175 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 176 | ; AVX-NEXT: retq |
| 177 | %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x |
| 178 | %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y) |
| 179 | ret <4 x float> %2 |
| 180 | } |
| 181 | |
| 182 | ; copysign(copysign(x,z), y) -> copysign(x, y) |
| 183 | define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x float> %y, <4 x float> %z) { |
| 184 | ; SSE-LABEL: combine_vec_fcopysign_fcopysign_mag: |
| 185 | ; SSE: # BB#0: |
| 186 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm1 |
| 187 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 188 | ; SSE-NEXT: orps %xmm1, %xmm0 |
| 189 | ; SSE-NEXT: retq |
| 190 | ; |
| 191 | ; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag: |
| 192 | ; AVX: # BB#0: |
| 193 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 194 | ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 |
| 195 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 196 | ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 |
| 197 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 198 | ; AVX-NEXT: retq |
| 199 | %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z) |
| 200 | %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y) |
| 201 | ret <4 x float> %2 |
| 202 | } |
| 203 | |
| 204 | ; copysign(x, copysign(y,z)) -> copysign(x, z) |
| 205 | define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x float> %y, <4 x float> %z) { |
| 206 | ; SSE-LABEL: combine_vec_fcopysign_fcopysign_sgn: |
| 207 | ; SSE: # BB#0: |
| 208 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm2 |
| 209 | ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 |
| 210 | ; SSE-NEXT: orps %xmm2, %xmm0 |
| 211 | ; SSE-NEXT: retq |
| 212 | ; |
| 213 | ; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn: |
| 214 | ; AVX: # BB#0: |
| 215 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 |
| 216 | ; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1 |
| 217 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 218 | ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 |
| 219 | ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 |
| 220 | ; AVX-NEXT: retq |
| 221 | %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z) |
| 222 | %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1) |
| 223 | ret <4 x float> %2 |
| 224 | } |
| 225 | |
| 226 | ; copysign(x, fp_extend(y)) -> copysign(x, y) |
| 227 | define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float> %y) { |
| 228 | ; SSE-LABEL: combine_vec_fcopysign_fpext_sgn: |
| 229 | ; SSE: # BB#0: |
| 230 | ; SSE-NEXT: movaps %xmm2, %xmm3 |
| 231 | ; SSE-NEXT: cvtss2sd %xmm2, %xmm4 |
| 232 | ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] |
| 233 | ; SSE-NEXT: movaps %xmm2, %xmm6 |
| 234 | ; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] |
| 235 | ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] |
| 236 | ; SSE-NEXT: movaps {{.*#+}} xmm7 |
| 237 | ; SSE-NEXT: movaps %xmm0, %xmm2 |
| 238 | ; SSE-NEXT: andps %xmm7, %xmm2 |
| 239 | ; SSE-NEXT: movaps {{.*#+}} xmm8 = [-0.000000e+00,-0.000000e+00] |
| 240 | ; SSE-NEXT: andps %xmm8, %xmm4 |
| 241 | ; SSE-NEXT: orps %xmm4, %xmm2 |
| 242 | ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] |
| 243 | ; SSE-NEXT: andps %xmm7, %xmm0 |
| 244 | ; SSE-NEXT: xorps %xmm4, %xmm4 |
| 245 | ; SSE-NEXT: cvtss2sd %xmm5, %xmm4 |
| 246 | ; SSE-NEXT: andps %xmm8, %xmm4 |
| 247 | ; SSE-NEXT: orps %xmm0, %xmm4 |
| 248 | ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0] |
| 249 | ; SSE-NEXT: movaps %xmm1, %xmm0 |
| 250 | ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] |
| 251 | ; SSE-NEXT: andps %xmm7, %xmm0 |
| 252 | ; SSE-NEXT: cvtss2sd %xmm3, %xmm3 |
| 253 | ; SSE-NEXT: andps %xmm8, %xmm3 |
| 254 | ; SSE-NEXT: orps %xmm0, %xmm3 |
| 255 | ; SSE-NEXT: andps %xmm7, %xmm1 |
| 256 | ; SSE-NEXT: xorps %xmm0, %xmm0 |
| 257 | ; SSE-NEXT: cvtss2sd %xmm6, %xmm0 |
| 258 | ; SSE-NEXT: andps %xmm8, %xmm0 |
| 259 | ; SSE-NEXT: orps %xmm0, %xmm1 |
| 260 | ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] |
| 261 | ; SSE-NEXT: movapd %xmm2, %xmm0 |
| 262 | ; SSE-NEXT: retq |
| 263 | ; |
| 264 | ; AVX-LABEL: combine_vec_fcopysign_fpext_sgn: |
| 265 | ; AVX: # BB#0: |
| 266 | ; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 |
| 267 | ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 |
| 268 | ; AVX-NEXT: vcvtps2pd %xmm1, %ymm1 |
| 269 | ; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 |
| 270 | ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 |
| 271 | ; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0 |
| 272 | ; AVX-NEXT: retq |
| 273 | %1 = fpext <4 x float> %y to <4 x double> |
| 274 | %2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1) |
| 275 | ret <4 x double> %2 |
| 276 | } |
| 277 | |
| 278 | ; copysign(x, fp_round(y)) -> copysign(x, y) |
| 279 | define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x double> %y) { |
| 280 | ; SSE-LABEL: combine_vec_fcopysign_fptrunc_sgn: |
| 281 | ; SSE: # BB#0: |
| 282 | ; SSE-NEXT: movaps %xmm0, %xmm3 |
| 283 | ; SSE-NEXT: movaps {{.*#+}} xmm5 |
| 284 | ; SSE-NEXT: andps %xmm5, %xmm0 |
| 285 | ; SSE-NEXT: cvtsd2ss %xmm1, %xmm6 |
| 286 | ; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] |
| 287 | ; SSE-NEXT: andps %xmm4, %xmm6 |
| 288 | ; SSE-NEXT: orps %xmm6, %xmm0 |
| 289 | ; SSE-NEXT: movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] |
| 290 | ; SSE-NEXT: andps %xmm5, %xmm6 |
| 291 | ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] |
| 292 | ; SSE-NEXT: cvtsd2ss %xmm1, %xmm1 |
| 293 | ; SSE-NEXT: andps %xmm4, %xmm1 |
| 294 | ; SSE-NEXT: orps %xmm6, %xmm1 |
| 295 | ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] |
| 296 | ; SSE-NEXT: movaps %xmm3, %xmm1 |
| 297 | ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] |
| 298 | ; SSE-NEXT: andps %xmm5, %xmm1 |
| 299 | ; SSE-NEXT: xorps %xmm6, %xmm6 |
| 300 | ; SSE-NEXT: cvtsd2ss %xmm2, %xmm6 |
| 301 | ; SSE-NEXT: andps %xmm4, %xmm6 |
| 302 | ; SSE-NEXT: orps %xmm1, %xmm6 |
| 303 | ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3] |
| 304 | ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] |
| 305 | ; SSE-NEXT: andps %xmm5, %xmm3 |
| 306 | ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] |
| 307 | ; SSE-NEXT: xorps %xmm1, %xmm1 |
| 308 | ; SSE-NEXT: cvtsd2ss %xmm2, %xmm1 |
| 309 | ; SSE-NEXT: andps %xmm4, %xmm1 |
| 310 | ; SSE-NEXT: orps %xmm3, %xmm1 |
| 311 | ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] |
| 312 | ; SSE-NEXT: retq |
| 313 | ; |
| 314 | ; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn: |
| 315 | ; AVX: # BB#0: |
| 316 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 317 | ; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0 |
| Craig Topper | b8596e4 | 2016-11-14 01:53:29 +0000 | [diff] [blame] | 318 | ; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1 |
| Simon Pilgrim | f9648b7 | 2016-10-07 16:00:59 +0000 | [diff] [blame] | 319 | ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 |
| 320 | ; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1 |
| 321 | ; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 |
| 322 | ; AVX-NEXT: vzeroupper |
| 323 | ; AVX-NEXT: retq |
| 324 | %1 = fptrunc <4 x double> %y to <4 x float> |
| 325 | %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1) |
| 326 | ret <4 x float> %2 |
| 327 | } |
| 328 | |
| 329 | declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) |
| 330 | declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn) |
| 331 | declare <4 x double> @llvm.copysign.v4f64(<4 x double> %Mag, <4 x double> %Sgn) |