| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE | 
|  | 2 | ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 3 |  | 
|  | 4 | ; Verify that the first two adds are independent regardless of how the inputs are | 
|  | 5 | ; commuted. The destination registers are used as source registers for the third add. | 
|  | 6 |  | 
|  | 7 | define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { | 
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 8 | ; SSE-LABEL: reassociate_adds1: | 
|  | 9 | ; SSE:       # BB#0: | 
|  | 10 | ; SSE-NEXT:    addss %xmm1, %xmm0 | 
|  | 11 | ; SSE-NEXT:    addss %xmm3, %xmm2 | 
|  | 12 | ; SSE-NEXT:    addss %xmm2, %xmm0 | 
|  | 13 | ; SSE-NEXT:    retq | 
|  | 14 | ; | 
|  | 15 | ; AVX-LABEL: reassociate_adds1: | 
|  | 16 | ; AVX:       # BB#0: | 
|  | 17 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 18 | ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1 | 
|  | 19 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 20 | ; AVX-NEXT:    retq | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 21 | %t0 = fadd float %x0, %x1 | 
|  | 22 | %t1 = fadd float %t0, %x2 | 
|  | 23 | %t2 = fadd float %t1, %x3 | 
|  | 24 | ret float %t2 | 
|  | 25 | } | 
|  | 26 |  | 
|  | 27 | define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { | 
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 28 | ; SSE-LABEL: reassociate_adds2: | 
|  | 29 | ; SSE:       # BB#0: | 
|  | 30 | ; SSE-NEXT:    addss %xmm1, %xmm0 | 
|  | 31 | ; SSE-NEXT:    addss %xmm3, %xmm2 | 
|  | 32 | ; SSE-NEXT:    addss %xmm2, %xmm0 | 
|  | 33 | ; SSE-NEXT:    retq | 
|  | 34 | ; | 
|  | 35 | ; AVX-LABEL: reassociate_adds2: | 
|  | 36 | ; AVX:       # BB#0: | 
|  | 37 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 38 | ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1 | 
|  | 39 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 40 | ; AVX-NEXT:    retq | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 41 | %t0 = fadd float %x0, %x1 | 
|  | 42 | %t1 = fadd float %x2, %t0 | 
|  | 43 | %t2 = fadd float %t1, %x3 | 
|  | 44 | ret float %t2 | 
|  | 45 | } | 
|  | 46 |  | 
|  | 47 | define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { | 
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 48 | ; SSE-LABEL: reassociate_adds3: | 
|  | 49 | ; SSE:       # BB#0: | 
|  | 50 | ; SSE-NEXT:    addss %xmm1, %xmm0 | 
|  | 51 | ; SSE-NEXT:    addss %xmm3, %xmm2 | 
|  | 52 | ; SSE-NEXT:    addss %xmm2, %xmm0 | 
|  | 53 | ; SSE-NEXT:    retq | 
|  | 54 | ; | 
|  | 55 | ; AVX-LABEL: reassociate_adds3: | 
|  | 56 | ; AVX:       # BB#0: | 
|  | 57 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 58 | ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1 | 
|  | 59 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 60 | ; AVX-NEXT:    retq | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 61 | %t0 = fadd float %x0, %x1 | 
|  | 62 | %t1 = fadd float %t0, %x2 | 
|  | 63 | %t2 = fadd float %x3, %t1 | 
|  | 64 | ret float %t2 | 
|  | 65 | } | 
|  | 66 |  | 
|  | 67 | define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { | 
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 68 | ; SSE-LABEL: reassociate_adds4: | 
|  | 69 | ; SSE:       # BB#0: | 
|  | 70 | ; SSE-NEXT:    addss %xmm1, %xmm0 | 
|  | 71 | ; SSE-NEXT:    addss %xmm3, %xmm2 | 
|  | 72 | ; SSE-NEXT:    addss %xmm2, %xmm0 | 
|  | 73 | ; SSE-NEXT:    retq | 
|  | 74 | ; | 
|  | 75 | ; AVX-LABEL: reassociate_adds4: | 
|  | 76 | ; AVX:       # BB#0: | 
|  | 77 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 78 | ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1 | 
|  | 79 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 80 | ; AVX-NEXT:    retq | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 81 | %t0 = fadd float %x0, %x1 | 
|  | 82 | %t1 = fadd float %x2, %t0 | 
|  | 83 | %t2 = fadd float %x3, %t1 | 
|  | 84 | ret float %t2 | 
|  | 85 | } | 
|  | 86 |  | 
|  | 87 | ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not | 
|  | 88 | ; produced because that would cost more compile time. | 
|  | 89 |  | 
|  | 90 | define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { | 
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 91 | ; SSE-LABEL: reassociate_adds5: | 
|  | 92 | ; SSE:       # BB#0: | 
|  | 93 | ; SSE-NEXT:    addss %xmm1, %xmm0 | 
|  | 94 | ; SSE-NEXT:    addss %xmm3, %xmm2 | 
|  | 95 | ; SSE-NEXT:    addss %xmm2, %xmm0 | 
|  | 96 | ; SSE-NEXT:    addss %xmm5, %xmm4 | 
|  | 97 | ; SSE-NEXT:    addss %xmm6, %xmm4 | 
|  | 98 | ; SSE-NEXT:    addss %xmm4, %xmm0 | 
|  | 99 | ; SSE-NEXT:    addss %xmm7, %xmm0 | 
|  | 100 | ; SSE-NEXT:    retq | 
|  | 101 | ; | 
|  | 102 | ; AVX-LABEL: reassociate_adds5: | 
|  | 103 | ; AVX:       # BB#0: | 
|  | 104 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 105 | ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1 | 
|  | 106 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 107 | ; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1 | 
|  | 108 | ; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1 | 
|  | 109 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 110 | ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0 | 
|  | 111 | ; AVX-NEXT:    retq | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 112 | %t0 = fadd float %x0, %x1 | 
|  | 113 | %t1 = fadd float %t0, %x2 | 
|  | 114 | %t2 = fadd float %t1, %x3 | 
|  | 115 | %t3 = fadd float %t2, %x4 | 
|  | 116 | %t4 = fadd float %t3, %x5 | 
|  | 117 | %t5 = fadd float %t4, %x6 | 
|  | 118 | %t6 = fadd float %t5, %x7 | 
|  | 119 | ret float %t6 | 
|  | 120 | } | 
|  | 121 |  | 
|  | 122 | ; Verify that we only need two associative operations to reassociate the operands. | 
|  | 123 | ; Also, we should reassociate such that the result of the high latency division | 
|  | 124 | ; is used by the final 'add' rather than reassociating the %x3 operand with the | 
|  | 125 | ; division. The latter reassociation would not improve anything. | 
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 126 |  | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 127 | define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { | 
| Sanjay Patel | 681a56a | 2015-07-06 22:35:29 +0000 | [diff] [blame] | 128 | ; SSE-LABEL: reassociate_adds6: | 
|  | 129 | ; SSE:       # BB#0: | 
|  | 130 | ; SSE-NEXT:    divss %xmm1, %xmm0 | 
|  | 131 | ; SSE-NEXT:    addss %xmm3, %xmm2 | 
|  | 132 | ; SSE-NEXT:    addss %xmm2, %xmm0 | 
|  | 133 | ; SSE-NEXT:    retq | 
|  | 134 | ; | 
|  | 135 | ; AVX-LABEL: reassociate_adds6: | 
|  | 136 | ; AVX:       # BB#0: | 
|  | 137 | ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0 | 
|  | 138 | ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1 | 
|  | 139 | ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 | 
|  | 140 | ; AVX-NEXT:    retq | 
| Sanjay Patel | e79b43a | 2015-06-23 00:39:40 +0000 | [diff] [blame] | 141 | %t0 = fdiv float %x0, %x1 | 
|  | 142 | %t1 = fadd float %x2, %t0 | 
|  | 143 | %t2 = fadd float %x3, %t1 | 
|  | 144 | ret float %t2 | 
|  | 145 | } | 
|  | 146 |  | 
| Sanjay Patel | ea81edf | 2015-07-09 22:48:54 +0000 | [diff] [blame] | 147 | ; Verify that SSE and AVX scalar single-precison multiplies are reassociated. | 
| Sanjay Patel | 093fb17 | 2015-07-08 22:35:20 +0000 | [diff] [blame] | 148 |  | 
|  | 149 | define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { | 
|  | 150 | ; SSE-LABEL: reassociate_muls1: | 
|  | 151 | ; SSE:       # BB#0: | 
|  | 152 | ; SSE-NEXT:    divss %xmm1, %xmm0 | 
|  | 153 | ; SSE-NEXT:    mulss %xmm3, %xmm2 | 
|  | 154 | ; SSE-NEXT:    mulss %xmm2, %xmm0 | 
|  | 155 | ; SSE-NEXT:    retq | 
|  | 156 | ; | 
|  | 157 | ; AVX-LABEL: reassociate_muls1: | 
|  | 158 | ; AVX:       # BB#0: | 
|  | 159 | ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0 | 
|  | 160 | ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1 | 
|  | 161 | ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0 | 
|  | 162 | ; AVX-NEXT:    retq | 
|  | 163 | %t0 = fdiv float %x0, %x1 | 
|  | 164 | %t1 = fmul float %x2, %t0 | 
|  | 165 | %t2 = fmul float %x3, %t1 | 
|  | 166 | ret float %t2 | 
|  | 167 | } | 
| Sanjay Patel | ea81edf | 2015-07-09 22:48:54 +0000 | [diff] [blame] | 168 |  | 
|  | 169 | ; Verify that SSE and AVX scalar double-precison adds are reassociated. | 
|  | 170 |  | 
|  | 171 | define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { | 
|  | 172 | ; SSE-LABEL: reassociate_adds_double: | 
|  | 173 | ; SSE:       # BB#0: | 
|  | 174 | ; SSE-NEXT:    divsd %xmm1, %xmm0 | 
|  | 175 | ; SSE-NEXT:    addsd %xmm3, %xmm2 | 
|  | 176 | ; SSE-NEXT:    addsd %xmm2, %xmm0 | 
|  | 177 | ; SSE-NEXT:    retq | 
|  | 178 | ; | 
|  | 179 | ; AVX-LABEL: reassociate_adds_double: | 
|  | 180 | ; AVX:       # BB#0: | 
|  | 181 | ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 | 
|  | 182 | ; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1 | 
|  | 183 | ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 | 
|  | 184 | ; AVX-NEXT:    retq | 
|  | 185 | %t0 = fdiv double %x0, %x1 | 
|  | 186 | %t1 = fadd double %x2, %t0 | 
|  | 187 | %t2 = fadd double %x3, %t1 | 
|  | 188 | ret double %t2 | 
|  | 189 | } | 
| Sanjay Patel | 81beefc | 2015-07-09 22:58:39 +0000 | [diff] [blame] | 190 |  | 
|  | 191 | ; Verify that SSE and AVX scalar double-precison multiplies are reassociated. | 
|  | 192 |  | 
|  | 193 | define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { | 
|  | 194 | ; SSE-LABEL: reassociate_muls_double: | 
|  | 195 | ; SSE:       # BB#0: | 
|  | 196 | ; SSE-NEXT:    divsd %xmm1, %xmm0 | 
|  | 197 | ; SSE-NEXT:    mulsd %xmm3, %xmm2 | 
|  | 198 | ; SSE-NEXT:    mulsd %xmm2, %xmm0 | 
|  | 199 | ; SSE-NEXT:    retq | 
|  | 200 | ; | 
|  | 201 | ; AVX-LABEL: reassociate_muls_double: | 
|  | 202 | ; AVX:       # BB#0: | 
|  | 203 | ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 | 
|  | 204 | ; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1 | 
|  | 205 | ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 | 
|  | 206 | ; AVX-NEXT:    retq | 
|  | 207 | %t0 = fdiv double %x0, %x1 | 
|  | 208 | %t1 = fmul double %x2, %t0 | 
|  | 209 | %t2 = fmul double %x3, %t1 | 
|  | 210 | ret double %t2 | 
|  | 211 | } | 
|  | 212 |  |