Dimitry Andric | 227b928 | 2016-01-03 17:22:03 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s |
| 2 | |
| 3 | ; Check that we do not get excessive spilling from splitting of constant live ranges. |
| 4 | |
| 5 | ; CHECK-LABEL: PR24139: |
| 6 | ; CHECK: # 16-byte Spill |
| 7 | ; CHECK-NOT: # 16-byte Spill |
| 8 | ; CHECK: retq |
| 9 | |
| 10 | define <2 x double> @PR24139(<2 x double> %arg, <2 x double> %arg1, <2 x double> %arg2) { |
| 11 | %tmp = bitcast <2 x double> %arg to <4 x float> |
| 12 | %tmp3 = fmul <4 x float> %tmp, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000> |
| 13 | %tmp4 = bitcast <2 x double> %arg to <4 x i32> |
| 14 | %tmp5 = and <4 x i32> %tmp4, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> |
| 15 | %tmp6 = or <4 x i32> %tmp5, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608> |
| 16 | %tmp7 = bitcast <4 x i32> %tmp6 to <4 x float> |
| 17 | %tmp8 = fadd <4 x float> %tmp3, %tmp7 |
| 18 | %tmp9 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp8) #2 |
| 19 | %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64> |
| 20 | %tmp11 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp9) #2 |
| 21 | %tmp12 = fmul <4 x float> %tmp11, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000> |
| 22 | %tmp13 = fsub <4 x float> %tmp, %tmp12 |
| 23 | %tmp14 = fmul <4 x float> %tmp11, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000> |
| 24 | %tmp15 = fsub <4 x float> %tmp13, %tmp14 |
| 25 | %tmp16 = fmul <4 x float> %tmp15, %tmp15 |
| 26 | %tmp17 = fmul <4 x float> %tmp15, %tmp16 |
| 27 | %tmp18 = fmul <4 x float> %tmp16, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000> |
| 28 | %tmp19 = fadd <4 x float> %tmp18, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000> |
| 29 | %tmp20 = fmul <4 x float> %tmp16, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000> |
| 30 | %tmp21 = fadd <4 x float> %tmp20, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000> |
| 31 | %tmp22 = fmul <4 x float> %tmp16, %tmp19 |
| 32 | %tmp23 = fadd <4 x float> %tmp22, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000> |
| 33 | %tmp24 = fmul <4 x float> %tmp16, %tmp21 |
| 34 | %tmp25 = fadd <4 x float> %tmp24, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000> |
| 35 | %tmp26 = fmul <4 x float> %tmp16, %tmp23 |
| 36 | %tmp27 = fadd <4 x float> %tmp26, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> |
| 37 | %tmp28 = fmul <4 x float> %tmp17, %tmp25 |
| 38 | %tmp29 = fadd <4 x float> %tmp15, %tmp28 |
| 39 | %tmp30 = and <2 x i64> %tmp10, <i64 4294967297, i64 4294967297> |
| 40 | %tmp31 = bitcast <2 x i64> %tmp30 to <4 x i32> |
| 41 | %tmp32 = icmp eq <4 x i32> %tmp31, zeroinitializer |
| 42 | %tmp33 = sext <4 x i1> %tmp32 to <4 x i32> |
| 43 | %tmp34 = bitcast <4 x i32> %tmp33 to <4 x float> |
| 44 | %tmp35 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp27, <4 x float> %tmp29, <4 x float> %tmp34) #2 |
| 45 | %tmp36 = and <2 x i64> %tmp10, <i64 8589934594, i64 8589934594> |
| 46 | %tmp37 = bitcast <2 x i64> %tmp36 to <4 x i32> |
| 47 | %tmp38 = icmp eq <4 x i32> %tmp37, zeroinitializer |
| 48 | %tmp39 = sext <4 x i1> %tmp38 to <4 x i32> |
| 49 | %tmp40 = bitcast <4 x float> %tmp35 to <4 x i32> |
| 50 | %tmp41 = xor <4 x i32> %tmp40, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> |
| 51 | %tmp42 = bitcast <4 x i32> %tmp41 to <4 x float> |
| 52 | %tmp43 = bitcast <4 x i32> %tmp39 to <4 x float> |
| 53 | %tmp44 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp42, <4 x float> %tmp35, <4 x float> %tmp43) #2 |
| 54 | %tmp45 = bitcast <2 x double> %arg1 to <4 x float> |
| 55 | %tmp46 = fmul <4 x float> %tmp45, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000> |
| 56 | %tmp47 = bitcast <2 x double> %arg1 to <4 x i32> |
| 57 | %tmp48 = and <4 x i32> %tmp47, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> |
| 58 | %tmp49 = or <4 x i32> %tmp48, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608> |
| 59 | %tmp50 = bitcast <4 x i32> %tmp49 to <4 x float> |
| 60 | %tmp51 = fadd <4 x float> %tmp46, %tmp50 |
| 61 | %tmp52 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp51) #2 |
| 62 | %tmp53 = bitcast <4 x i32> %tmp52 to <2 x i64> |
| 63 | %tmp54 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp52) #2 |
| 64 | %tmp55 = fmul <4 x float> %tmp54, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000> |
| 65 | %tmp56 = fsub <4 x float> %tmp45, %tmp55 |
| 66 | %tmp57 = fmul <4 x float> %tmp54, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000> |
| 67 | %tmp58 = fsub <4 x float> %tmp56, %tmp57 |
| 68 | %tmp59 = fmul <4 x float> %tmp58, %tmp58 |
| 69 | %tmp60 = fmul <4 x float> %tmp58, %tmp59 |
| 70 | %tmp61 = fmul <4 x float> %tmp59, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000> |
| 71 | %tmp62 = fadd <4 x float> %tmp61, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000> |
| 72 | %tmp63 = fmul <4 x float> %tmp59, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000> |
| 73 | %tmp64 = fadd <4 x float> %tmp63, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000> |
| 74 | %tmp65 = fmul <4 x float> %tmp59, %tmp62 |
| 75 | %tmp66 = fadd <4 x float> %tmp65, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000> |
| 76 | %tmp67 = fmul <4 x float> %tmp59, %tmp64 |
| 77 | %tmp68 = fadd <4 x float> %tmp67, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000> |
| 78 | %tmp69 = fmul <4 x float> %tmp59, %tmp66 |
| 79 | %tmp70 = fadd <4 x float> %tmp69, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> |
| 80 | %tmp71 = fmul <4 x float> %tmp60, %tmp68 |
| 81 | %tmp72 = fadd <4 x float> %tmp58, %tmp71 |
| 82 | %tmp73 = and <2 x i64> %tmp53, <i64 4294967297, i64 4294967297> |
| 83 | %tmp74 = bitcast <2 x i64> %tmp73 to <4 x i32> |
| 84 | %tmp75 = icmp eq <4 x i32> %tmp74, zeroinitializer |
| 85 | %tmp76 = sext <4 x i1> %tmp75 to <4 x i32> |
| 86 | %tmp77 = bitcast <4 x i32> %tmp76 to <4 x float> |
| 87 | %tmp78 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp70, <4 x float> %tmp72, <4 x float> %tmp77) #2 |
| 88 | %tmp79 = and <2 x i64> %tmp53, <i64 8589934594, i64 8589934594> |
| 89 | %tmp80 = bitcast <2 x i64> %tmp79 to <4 x i32> |
| 90 | %tmp81 = icmp eq <4 x i32> %tmp80, zeroinitializer |
| 91 | %tmp82 = sext <4 x i1> %tmp81 to <4 x i32> |
| 92 | %tmp83 = bitcast <4 x float> %tmp78 to <4 x i32> |
| 93 | %tmp84 = xor <4 x i32> %tmp83, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> |
| 94 | %tmp85 = bitcast <4 x i32> %tmp84 to <4 x float> |
| 95 | %tmp86 = bitcast <4 x i32> %tmp82 to <4 x float> |
| 96 | %tmp87 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp85, <4 x float> %tmp78, <4 x float> %tmp86) #2 |
| 97 | %tmp88 = fadd <4 x float> %tmp44, %tmp87 |
| 98 | %tmp89 = bitcast <2 x double> %arg2 to <4 x float> |
| 99 | %tmp90 = fmul <4 x float> %tmp89, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000> |
| 100 | %tmp91 = bitcast <2 x double> %arg2 to <4 x i32> |
| 101 | %tmp92 = and <4 x i32> %tmp91, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> |
| 102 | %tmp93 = or <4 x i32> %tmp92, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608> |
| 103 | %tmp94 = bitcast <4 x i32> %tmp93 to <4 x float> |
| 104 | %tmp95 = fadd <4 x float> %tmp90, %tmp94 |
| 105 | %tmp96 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp95) #2 |
| 106 | %tmp97 = bitcast <4 x i32> %tmp96 to <2 x i64> |
| 107 | %tmp98 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp96) #2 |
| 108 | %tmp99 = fmul <4 x float> %tmp98, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000> |
| 109 | %tmp100 = fsub <4 x float> %tmp89, %tmp99 |
| 110 | %tmp101 = fmul <4 x float> %tmp98, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000> |
| 111 | %tmp102 = fsub <4 x float> %tmp100, %tmp101 |
| 112 | %tmp103 = fmul <4 x float> %tmp102, %tmp102 |
| 113 | %tmp104 = fmul <4 x float> %tmp102, %tmp103 |
| 114 | %tmp105 = fmul <4 x float> %tmp103, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000> |
| 115 | %tmp106 = fadd <4 x float> %tmp105, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000> |
| 116 | %tmp107 = fmul <4 x float> %tmp103, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000> |
| 117 | %tmp108 = fadd <4 x float> %tmp107, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000> |
| 118 | %tmp109 = fmul <4 x float> %tmp103, %tmp106 |
| 119 | %tmp110 = fadd <4 x float> %tmp109, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000> |
| 120 | %tmp111 = fmul <4 x float> %tmp103, %tmp108 |
| 121 | %tmp112 = fadd <4 x float> %tmp111, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000> |
| 122 | %tmp113 = fmul <4 x float> %tmp103, %tmp110 |
| 123 | %tmp114 = fadd <4 x float> %tmp113, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> |
| 124 | %tmp115 = fmul <4 x float> %tmp104, %tmp112 |
| 125 | %tmp116 = fadd <4 x float> %tmp102, %tmp115 |
| 126 | %tmp117 = and <2 x i64> %tmp97, <i64 4294967297, i64 4294967297> |
| 127 | %tmp118 = bitcast <2 x i64> %tmp117 to <4 x i32> |
| 128 | %tmp119 = icmp eq <4 x i32> %tmp118, zeroinitializer |
| 129 | %tmp120 = sext <4 x i1> %tmp119 to <4 x i32> |
| 130 | %tmp121 = bitcast <4 x i32> %tmp120 to <4 x float> |
| 131 | %tmp122 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp114, <4 x float> %tmp116, <4 x float> %tmp121) #2 |
| 132 | %tmp123 = and <2 x i64> %tmp97, <i64 8589934594, i64 8589934594> |
| 133 | %tmp124 = bitcast <2 x i64> %tmp123 to <4 x i32> |
| 134 | %tmp125 = icmp eq <4 x i32> %tmp124, zeroinitializer |
| 135 | %tmp126 = sext <4 x i1> %tmp125 to <4 x i32> |
| 136 | %tmp127 = bitcast <4 x float> %tmp122 to <4 x i32> |
| 137 | %tmp128 = xor <4 x i32> %tmp127, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> |
| 138 | %tmp129 = bitcast <4 x i32> %tmp128 to <4 x float> |
| 139 | %tmp130 = bitcast <4 x i32> %tmp126 to <4 x float> |
| 140 | %tmp131 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp129, <4 x float> %tmp122, <4 x float> %tmp130) #2 |
| 141 | %tmp132 = fadd <4 x float> %tmp88, %tmp131 |
| 142 | %tmp133 = bitcast <4 x float> %tmp132 to <2 x double> |
| 143 | ret <2 x double> %tmp133 |
| 144 | } |
| 145 | |
| 146 | declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) |
| 147 | declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) |
| 148 | declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) |