| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE |
| 2 | ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 3 | |
| 4 | ; Verify that the first two adds are independent regardless of how the inputs are |
| 5 | ; commuted. The destination registers are used as source registers for the third add. |
| 6 | |
| 7 | define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { |
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 8 | ; SSE-LABEL: reassociate_adds1: |
| 9 | ; SSE: # BB#0: |
| 10 | ; SSE-NEXT: addss %xmm1, %xmm0 |
| 11 | ; SSE-NEXT: addss %xmm3, %xmm2 |
| 12 | ; SSE-NEXT: addss %xmm2, %xmm0 |
| 13 | ; SSE-NEXT: retq |
| 14 | ; |
| 15 | ; AVX-LABEL: reassociate_adds1: |
| 16 | ; AVX: # BB#0: |
| 17 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 18 | ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 |
| 19 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 20 | ; AVX-NEXT: retq |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 21 | %t0 = fadd float %x0, %x1 |
| 22 | %t1 = fadd float %t0, %x2 |
| 23 | %t2 = fadd float %t1, %x3 |
| 24 | ret float %t2 |
| 25 | } |
| 26 | |
| 27 | define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { |
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 28 | ; SSE-LABEL: reassociate_adds2: |
| 29 | ; SSE: # BB#0: |
| 30 | ; SSE-NEXT: addss %xmm1, %xmm0 |
| 31 | ; SSE-NEXT: addss %xmm3, %xmm2 |
| 32 | ; SSE-NEXT: addss %xmm2, %xmm0 |
| 33 | ; SSE-NEXT: retq |
| 34 | ; |
| 35 | ; AVX-LABEL: reassociate_adds2: |
| 36 | ; AVX: # BB#0: |
| 37 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 38 | ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 |
| 39 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 40 | ; AVX-NEXT: retq |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 41 | %t0 = fadd float %x0, %x1 |
| 42 | %t1 = fadd float %x2, %t0 |
| 43 | %t2 = fadd float %t1, %x3 |
| 44 | ret float %t2 |
| 45 | } |
| 46 | |
| 47 | define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { |
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 48 | ; SSE-LABEL: reassociate_adds3: |
| 49 | ; SSE: # BB#0: |
| 50 | ; SSE-NEXT: addss %xmm1, %xmm0 |
| 51 | ; SSE-NEXT: addss %xmm3, %xmm2 |
| 52 | ; SSE-NEXT: addss %xmm2, %xmm0 |
| 53 | ; SSE-NEXT: retq |
| 54 | ; |
| 55 | ; AVX-LABEL: reassociate_adds3: |
| 56 | ; AVX: # BB#0: |
| 57 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 58 | ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 |
| 59 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 60 | ; AVX-NEXT: retq |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 61 | %t0 = fadd float %x0, %x1 |
| 62 | %t1 = fadd float %t0, %x2 |
| 63 | %t2 = fadd float %x3, %t1 |
| 64 | ret float %t2 |
| 65 | } |
| 66 | |
| 67 | define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { |
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 68 | ; SSE-LABEL: reassociate_adds4: |
| 69 | ; SSE: # BB#0: |
| 70 | ; SSE-NEXT: addss %xmm1, %xmm0 |
| 71 | ; SSE-NEXT: addss %xmm3, %xmm2 |
| 72 | ; SSE-NEXT: addss %xmm2, %xmm0 |
| 73 | ; SSE-NEXT: retq |
| 74 | ; |
| 75 | ; AVX-LABEL: reassociate_adds4: |
| 76 | ; AVX: # BB#0: |
| 77 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 78 | ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 |
| 79 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 80 | ; AVX-NEXT: retq |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 81 | %t0 = fadd float %x0, %x1 |
| 82 | %t1 = fadd float %x2, %t0 |
| 83 | %t2 = fadd float %x3, %t1 |
| 84 | ret float %t2 |
| 85 | } |
| 86 | |
| 87 | ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not |
| 88 | ; produced because that would cost more compile time. |
| 89 | |
| 90 | define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { |
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 91 | ; SSE-LABEL: reassociate_adds5: |
| 92 | ; SSE: # BB#0: |
| 93 | ; SSE-NEXT: addss %xmm1, %xmm0 |
| 94 | ; SSE-NEXT: addss %xmm3, %xmm2 |
| 95 | ; SSE-NEXT: addss %xmm2, %xmm0 |
| 96 | ; SSE-NEXT: addss %xmm5, %xmm4 |
| 97 | ; SSE-NEXT: addss %xmm6, %xmm4 |
| 98 | ; SSE-NEXT: addss %xmm4, %xmm0 |
| 99 | ; SSE-NEXT: addss %xmm7, %xmm0 |
| 100 | ; SSE-NEXT: retq |
| 101 | ; |
| 102 | ; AVX-LABEL: reassociate_adds5: |
| 103 | ; AVX: # BB#0: |
| 104 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 105 | ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 |
| 106 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 107 | ; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1 |
| 108 | ; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1 |
| 109 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 110 | ; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0 |
| 111 | ; AVX-NEXT: retq |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 112 | %t0 = fadd float %x0, %x1 |
| 113 | %t1 = fadd float %t0, %x2 |
| 114 | %t2 = fadd float %t1, %x3 |
| 115 | %t3 = fadd float %t2, %x4 |
| 116 | %t4 = fadd float %t3, %x5 |
| 117 | %t5 = fadd float %t4, %x6 |
| 118 | %t6 = fadd float %t5, %x7 |
| 119 | ret float %t6 |
| 120 | } |
| 121 | |
| 122 | ; Verify that we only need two associative operations to reassociate the operands. |
| 123 | ; Also, we should reassociate such that the result of the high latency division |
| 124 | ; is used by the final 'add' rather than reassociating the %x3 operand with the |
| 125 | ; division. The latter reassociation would not improve anything. |
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 126 | |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 127 | define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { |
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 128 | ; SSE-LABEL: reassociate_adds6: |
| 129 | ; SSE: # BB#0: |
| 130 | ; SSE-NEXT: divss %xmm1, %xmm0 |
| 131 | ; SSE-NEXT: addss %xmm3, %xmm2 |
| 132 | ; SSE-NEXT: addss %xmm2, %xmm0 |
| 133 | ; SSE-NEXT: retq |
| 134 | ; |
| 135 | ; AVX-LABEL: reassociate_adds6: |
| 136 | ; AVX: # BB#0: |
| 137 | ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 |
| 138 | ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 |
| 139 | ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 |
| 140 | ; AVX-NEXT: retq |
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 141 | %t0 = fdiv float %x0, %x1 |
| 142 | %t1 = fadd float %x2, %t0 |
| 143 | %t2 = fadd float %x3, %t1 |
| 144 | ret float %t2 |
| 145 | } |
| 146 | |
| Sanjay Patel | ea81edf | 2015-07-09 22:48:54 +0000 | [diff] [blame] | 147 | ; Verify that SSE and AVX scalar single-precison multiplies are reassociated. |
| Sanjay Patel | 093fb17 | 2015-07-08 22:35:20 +0000 | [diff] [blame] | 148 | |
| 149 | define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { |
| 150 | ; SSE-LABEL: reassociate_muls1: |
| 151 | ; SSE: # BB#0: |
| 152 | ; SSE-NEXT: divss %xmm1, %xmm0 |
| 153 | ; SSE-NEXT: mulss %xmm3, %xmm2 |
| 154 | ; SSE-NEXT: mulss %xmm2, %xmm0 |
| 155 | ; SSE-NEXT: retq |
| 156 | ; |
| 157 | ; AVX-LABEL: reassociate_muls1: |
| 158 | ; AVX: # BB#0: |
| 159 | ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 |
| 160 | ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1 |
| 161 | ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 |
| 162 | ; AVX-NEXT: retq |
| 163 | %t0 = fdiv float %x0, %x1 |
| 164 | %t1 = fmul float %x2, %t0 |
| 165 | %t2 = fmul float %x3, %t1 |
| 166 | ret float %t2 |
| 167 | } |
| Sanjay Patel | ea81edf | 2015-07-09 22:48:54 +0000 | [diff] [blame] | 168 | |
| 169 | ; Verify that SSE and AVX scalar double-precison adds are reassociated. |
| 170 | |
| 171 | define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { |
| 172 | ; SSE-LABEL: reassociate_adds_double: |
| 173 | ; SSE: # BB#0: |
| 174 | ; SSE-NEXT: divsd %xmm1, %xmm0 |
| 175 | ; SSE-NEXT: addsd %xmm3, %xmm2 |
| 176 | ; SSE-NEXT: addsd %xmm2, %xmm0 |
| 177 | ; SSE-NEXT: retq |
| 178 | ; |
| 179 | ; AVX-LABEL: reassociate_adds_double: |
| 180 | ; AVX: # BB#0: |
| 181 | ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 |
| 182 | ; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1 |
| 183 | ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 |
| 184 | ; AVX-NEXT: retq |
| 185 | %t0 = fdiv double %x0, %x1 |
| 186 | %t1 = fadd double %x2, %t0 |
| 187 | %t2 = fadd double %x3, %t1 |
| 188 | ret double %t2 |
| 189 | } |
| Sanjay Patel | 81beefc | 2015-07-09 22:58:39 +0000 | [diff] [blame] | 190 | |
| 191 | ; Verify that SSE and AVX scalar double-precison multiplies are reassociated. |
| 192 | |
| 193 | define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { |
| 194 | ; SSE-LABEL: reassociate_muls_double: |
| 195 | ; SSE: # BB#0: |
| 196 | ; SSE-NEXT: divsd %xmm1, %xmm0 |
| 197 | ; SSE-NEXT: mulsd %xmm3, %xmm2 |
| 198 | ; SSE-NEXT: mulsd %xmm2, %xmm0 |
| 199 | ; SSE-NEXT: retq |
| 200 | ; |
| 201 | ; AVX-LABEL: reassociate_muls_double: |
| 202 | ; AVX: # BB#0: |
| 203 | ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 |
| 204 | ; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1 |
| 205 | ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 |
| 206 | ; AVX-NEXT: retq |
| 207 | %t0 = fdiv double %x0, %x1 |
| 208 | %t1 = fmul double %x2, %t0 |
| 209 | %t2 = fmul double %x3, %t1 |
| 210 | ret double %t2 |
| 211 | } |
| 212 | |
| Sanjay Patel | e017826 | 2015-08-08 19:08:20 +0000 | [diff] [blame] | 213 | ; Verify that SSE and AVX 128-bit vector single-precison adds are reassociated. |
| 214 | |
| 215 | define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { |
| 216 | ; SSE-LABEL: reassociate_adds_v4f32: |
| 217 | ; SSE: # BB#0: |
| 218 | ; SSE-NEXT: mulps %xmm1, %xmm0 |
| 219 | ; SSE-NEXT: addps %xmm3, %xmm2 |
| 220 | ; SSE-NEXT: addps %xmm2, %xmm0 |
| 221 | ; SSE-NEXT: retq |
| 222 | ; |
| 223 | ; AVX-LABEL: reassociate_adds_v4f32: |
| 224 | ; AVX: # BB#0: |
| 225 | ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 |
| 226 | ; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1 |
| 227 | ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 |
| 228 | ; AVX-NEXT: retq |
| 229 | %t0 = fmul <4 x float> %x0, %x1 |
| 230 | %t1 = fadd <4 x float> %x2, %t0 |
| 231 | %t2 = fadd <4 x float> %x3, %t1 |
| 232 | ret <4 x float> %t2 |
| 233 | } |
| 234 | |
| 235 | ; Verify that SSE and AVX 128-bit vector double-precison adds are reassociated. |
| 236 | |
| 237 | define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { |
| 238 | ; SSE-LABEL: reassociate_adds_v2f64: |
| 239 | ; SSE: # BB#0: |
| 240 | ; SSE-NEXT: mulpd %xmm1, %xmm0 |
| 241 | ; SSE-NEXT: addpd %xmm3, %xmm2 |
| 242 | ; SSE-NEXT: addpd %xmm2, %xmm0 |
| 243 | ; SSE-NEXT: retq |
| 244 | ; |
| 245 | ; AVX-LABEL: reassociate_adds_v2f64: |
| 246 | ; AVX: # BB#0: |
| 247 | ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 |
| 248 | ; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1 |
| 249 | ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 |
| 250 | ; AVX-NEXT: retq |
| 251 | %t0 = fmul <2 x double> %x0, %x1 |
| 252 | %t1 = fadd <2 x double> %x2, %t0 |
| 253 | %t2 = fadd <2 x double> %x3, %t1 |
| 254 | ret <2 x double> %t2 |
| 255 | } |
| 256 | |