Hal Finkel | 5d36b23 | 2015-07-15 08:23:05 +0000 | [diff] [blame] | 1 | ; RUN: llc -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR |
| 2 | ; RUN: llc -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX |
| 3 | target datalayout = "E-m:e-i64:64-n32:64" |
| 4 | target triple = "powerpc64-unknown-linux-gnu" |
| 5 | |
| 6 | ; Verify that the first two adds are independent regardless of how the inputs are |
| 7 | ; commuted. The destination registers are used as source registers for the third add. |
| 8 | |
| 9 | define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { |
| 10 | ; CHECK-LABEL: reassociate_adds1: |
| 11 | ; CHECK: # BB#0: |
| 12 | ; CHECK: fadds [[REG0:[0-9]+]], 1, 2 |
| 13 | ; CHECK: fadds [[REG1:[0-9]+]], 3, 4 |
| 14 | ; CHECK: fadds 1, [[REG0]], [[REG1]] |
| 15 | ; CHECK-NEXT: blr |
| 16 | |
| 17 | %t0 = fadd float %x0, %x1 |
| 18 | %t1 = fadd float %t0, %x2 |
| 19 | %t2 = fadd float %t1, %x3 |
| 20 | ret float %t2 |
| 21 | } |
| 22 | |
| 23 | define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { |
| 24 | ; CHECK-LABEL: reassociate_adds2: |
| 25 | ; CHECK: # BB#0: |
| 26 | ; CHECK: fadds [[REG0:[0-9]+]], 1, 2 |
| 27 | ; CHECK: fadds [[REG1:[0-9]+]], 3, 4 |
| 28 | ; CHECK: fadds 1, [[REG0]], [[REG1]] |
| 29 | ; CHECK-NEXT: blr |
| 30 | |
| 31 | %t0 = fadd float %x0, %x1 |
| 32 | %t1 = fadd float %x2, %t0 |
| 33 | %t2 = fadd float %t1, %x3 |
| 34 | ret float %t2 |
| 35 | } |
| 36 | |
| 37 | define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { |
| 38 | ; CHECK-LABEL: reassociate_adds3: |
| 39 | ; CHECK: # BB#0: |
| 40 | ; CHECK: fadds [[REG0:[0-9]+]], 1, 2 |
| 41 | ; CHECK: fadds [[REG1:[0-9]+]], 3, 4 |
| 42 | ; CHECK: fadds 1, [[REG0]], [[REG1]] |
| 43 | ; CHECK-NEXT: blr |
| 44 | |
| 45 | %t0 = fadd float %x0, %x1 |
| 46 | %t1 = fadd float %t0, %x2 |
| 47 | %t2 = fadd float %x3, %t1 |
| 48 | ret float %t2 |
| 49 | } |
| 50 | |
| 51 | define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { |
| 52 | ; CHECK-LABEL: reassociate_adds4: |
| 53 | ; CHECK: # BB#0: |
| 54 | ; CHECK: fadds [[REG0:[0-9]+]], 1, 2 |
| 55 | ; CHECK: fadds [[REG1:[0-9]+]], 3, 4 |
| 56 | ; CHECK: fadds 1, [[REG0]], [[REG1]] |
| 57 | ; CHECK-NEXT: blr |
| 58 | |
| 59 | %t0 = fadd float %x0, %x1 |
| 60 | %t1 = fadd float %x2, %t0 |
| 61 | %t2 = fadd float %x3, %t1 |
| 62 | ret float %t2 |
| 63 | } |
| 64 | |
| 65 | ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not |
| 66 | ; produced because that would cost more compile time. |
| 67 | |
| 68 | define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { |
| 69 | ; CHECK-LABEL: reassociate_adds5: |
| 70 | ; CHECK: # BB#0: |
| 71 | ; CHECK: fadds [[REG12:[0-9]+]], 5, 6 |
| 72 | ; CHECK: fadds [[REG0:[0-9]+]], 1, 2 |
| 73 | ; CHECK: fadds [[REG11:[0-9]+]], 3, 4 |
| 74 | ; CHECK: fadds [[REG13:[0-9]+]], [[REG12]], 7 |
| 75 | ; CHECK: fadds [[REG1:[0-9]+]], [[REG0]], [[REG11]] |
| 76 | ; CHECK: fadds [[REG2:[0-9]+]], [[REG1]], [[REG13]] |
| 77 | ; CHECK: fadds 1, [[REG2]], 8 |
| 78 | ; CHECK-NEXT: blr |
| 79 | |
| 80 | %t0 = fadd float %x0, %x1 |
| 81 | %t1 = fadd float %t0, %x2 |
| 82 | %t2 = fadd float %t1, %x3 |
| 83 | %t3 = fadd float %t2, %x4 |
| 84 | %t4 = fadd float %t3, %x5 |
| 85 | %t5 = fadd float %t4, %x6 |
| 86 | %t6 = fadd float %t5, %x7 |
| 87 | ret float %t6 |
| 88 | } |
| 89 | |
| 90 | ; Verify that we reassociate vector instructions too. |
| 91 | |
| 92 | define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { |
| 93 | ; CHECK-LABEL: vector_reassociate_adds1: |
| 94 | ; CHECK: # BB#0: |
| 95 | ; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2 |
| 96 | ; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4 |
| 97 | ; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]] |
| 98 | ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 |
| 99 | ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 |
| 100 | ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] |
| 101 | ; CHECK-NEXT: blr |
| 102 | |
| 103 | %t0 = fadd <4 x float> %x0, %x1 |
| 104 | %t1 = fadd <4 x float> %t0, %x2 |
| 105 | %t2 = fadd <4 x float> %t1, %x3 |
| 106 | ret <4 x float> %t2 |
| 107 | } |
| 108 | |
| 109 | define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { |
| 110 | ; CHECK-LABEL: vector_reassociate_adds2: |
| 111 | ; CHECK: # BB#0: |
| 112 | ; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2 |
| 113 | ; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4 |
| 114 | ; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]] |
| 115 | ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 |
| 116 | ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 |
| 117 | ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] |
| 118 | ; CHECK-NEXT: blr |
| 119 | |
| 120 | %t0 = fadd <4 x float> %x0, %x1 |
| 121 | %t1 = fadd <4 x float> %x2, %t0 |
| 122 | %t2 = fadd <4 x float> %t1, %x3 |
| 123 | ret <4 x float> %t2 |
| 124 | } |
| 125 | |
| 126 | define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { |
| 127 | ; CHECK-LABEL: vector_reassociate_adds3: |
| 128 | ; CHECK: # BB#0: |
| 129 | ; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2 |
| 130 | ; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4 |
| 131 | ; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]] |
| 132 | ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 |
| 133 | ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 |
| 134 | ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] |
| 135 | ; CHECK-NEXT: blr |
| 136 | |
| 137 | %t0 = fadd <4 x float> %x0, %x1 |
| 138 | %t1 = fadd <4 x float> %t0, %x2 |
| 139 | %t2 = fadd <4 x float> %x3, %t1 |
| 140 | ret <4 x float> %t2 |
| 141 | } |
| 142 | |
| 143 | define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { |
| 144 | ; CHECK-LABEL: vector_reassociate_adds4: |
| 145 | ; CHECK: # BB#0: |
| 146 | ; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2 |
| 147 | ; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4 |
| 148 | ; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]] |
| 149 | ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 |
| 150 | ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 |
| 151 | ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] |
| 152 | ; CHECK-NEXT: blr |
| 153 | |
| 154 | %t0 = fadd <4 x float> %x0, %x1 |
| 155 | %t1 = fadd <4 x float> %x2, %t0 |
| 156 | %t2 = fadd <4 x float> %x3, %t1 |
| 157 | ret <4 x float> %t2 |
| 158 | } |
| 159 | |
| 160 | define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { |
| 161 | %t0 = fdiv float %x0, %x1 |
| 162 | %t1 = fadd float %x2, %t0 |
| 163 | %t2 = fadd float %x3, %t1 |
| 164 | ret float %t2 |
| 165 | } |
| 166 | |
| 167 | define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { |
| 168 | %t0 = fdiv float %x0, %x1 |
| 169 | %t1 = fmul float %x2, %t0 |
| 170 | %t2 = fmul float %x3, %t1 |
| 171 | ret float %t2 |
| 172 | } |
| 173 | |
| 174 | define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { |
| 175 | %t0 = fdiv double %x0, %x1 |
| 176 | %t1 = fadd double %x2, %t0 |
| 177 | %t2 = fadd double %x3, %t1 |
| 178 | ret double %t2 |
| 179 | } |
| 180 | |
| 181 | define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { |
| 182 | %t0 = fdiv double %x0, %x1 |
| 183 | %t1 = fmul double %x2, %t0 |
| 184 | %t2 = fmul double %x3, %t1 |
| 185 | ret double %t2 |
| 186 | } |
| 187 | |
| 188 | |