Simon Pilgrim | 6389240 | 2017-05-19 17:19:26 +0000 | [diff] [blame^] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s |
| 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s |
| 4 | ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s |
| 5 | |
| 6 | ; This test checks the fusing of MUL + SUB/ADD to FMSUBADD. |
| 7 | |
| 8 | define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 { |
| 9 | ; FMA3_256-LABEL: mul_subadd_pd128: |
| 10 | ; FMA3_256: # BB#0: # %entry |
| 11 | ; FMA3_256-NEXT: vmulpd %xmm1, %xmm0, %xmm0 |
| 12 | ; FMA3_256-NEXT: vsubpd %xmm2, %xmm0, %xmm1 |
| 13 | ; FMA3_256-NEXT: vaddpd %xmm2, %xmm0, %xmm0 |
| 14 | ; FMA3_256-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| 15 | ; FMA3_256-NEXT: retq |
| 16 | ; |
| 17 | ; FMA3_512-LABEL: mul_subadd_pd128: |
| 18 | ; FMA3_512: # BB#0: # %entry |
| 19 | ; FMA3_512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 |
| 20 | ; FMA3_512-NEXT: vsubpd %xmm2, %xmm0, %xmm1 |
| 21 | ; FMA3_512-NEXT: vaddpd %xmm2, %xmm0, %xmm0 |
| 22 | ; FMA3_512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| 23 | ; FMA3_512-NEXT: retq |
| 24 | ; |
| 25 | ; FMA4-LABEL: mul_subadd_pd128: |
| 26 | ; FMA4: # BB#0: # %entry |
| 27 | ; FMA4-NEXT: vmulpd %xmm1, %xmm0, %xmm0 |
| 28 | ; FMA4-NEXT: vsubpd %xmm2, %xmm0, %xmm1 |
| 29 | ; FMA4-NEXT: vaddpd %xmm2, %xmm0, %xmm0 |
| 30 | ; FMA4-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| 31 | ; FMA4-NEXT: retq |
| 32 | entry: |
| 33 | %AB = fmul <2 x double> %A, %B |
| 34 | %Sub = fsub <2 x double> %AB, %C |
| 35 | %Add = fadd <2 x double> %AB, %C |
| 36 | %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3> |
| 37 | ret <2 x double> %subadd |
| 38 | } |
| 39 | |
| 40 | define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 { |
| 41 | ; FMA3-LABEL: mul_subadd_ps128: |
| 42 | ; FMA3: # BB#0: # %entry |
| 43 | ; FMA3-NEXT: vmulps %xmm1, %xmm0, %xmm0 |
| 44 | ; FMA3-NEXT: vsubps %xmm2, %xmm0, %xmm1 |
| 45 | ; FMA3-NEXT: vaddps %xmm2, %xmm0, %xmm0 |
| 46 | ; FMA3-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] |
| 47 | ; FMA3-NEXT: retq |
| 48 | ; |
| 49 | ; FMA4-LABEL: mul_subadd_ps128: |
| 50 | ; FMA4: # BB#0: # %entry |
| 51 | ; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 |
| 52 | ; FMA4-NEXT: vsubps %xmm2, %xmm0, %xmm1 |
| 53 | ; FMA4-NEXT: vaddps %xmm2, %xmm0, %xmm0 |
| 54 | ; FMA4-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] |
| 55 | ; FMA4-NEXT: retq |
| 56 | entry: |
| 57 | %AB = fmul <4 x float> %A, %B |
| 58 | %Sub = fsub <4 x float> %AB, %C |
| 59 | %Add = fadd <4 x float> %AB, %C |
| 60 | %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| 61 | ret <4 x float> %subadd |
| 62 | } |
| 63 | |
| 64 | define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 { |
| 65 | ; FMA3-LABEL: mul_subadd_pd256: |
| 66 | ; FMA3: # BB#0: # %entry |
| 67 | ; FMA3-NEXT: vmulpd %ymm1, %ymm0, %ymm0 |
| 68 | ; FMA3-NEXT: vsubpd %ymm2, %ymm0, %ymm1 |
| 69 | ; FMA3-NEXT: vaddpd %ymm2, %ymm0, %ymm0 |
| 70 | ; FMA3-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] |
| 71 | ; FMA3-NEXT: retq |
| 72 | ; |
| 73 | ; FMA4-LABEL: mul_subadd_pd256: |
| 74 | ; FMA4: # BB#0: # %entry |
| 75 | ; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0 |
| 76 | ; FMA4-NEXT: vsubpd %ymm2, %ymm0, %ymm1 |
| 77 | ; FMA4-NEXT: vaddpd %ymm2, %ymm0, %ymm0 |
| 78 | ; FMA4-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] |
| 79 | ; FMA4-NEXT: retq |
| 80 | entry: |
| 81 | %AB = fmul <4 x double> %A, %B |
| 82 | %Sub = fsub <4 x double> %AB, %C |
| 83 | %Add = fadd <4 x double> %AB, %C |
| 84 | %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| 85 | ret <4 x double> %subadd |
| 86 | } |
| 87 | |
| 88 | define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 { |
| 89 | ; FMA3-LABEL: mul_subadd_ps256: |
| 90 | ; FMA3: # BB#0: # %entry |
| 91 | ; FMA3-NEXT: vmulps %ymm1, %ymm0, %ymm0 |
| 92 | ; FMA3-NEXT: vsubps %ymm2, %ymm0, %ymm1 |
| 93 | ; FMA3-NEXT: vaddps %ymm2, %ymm0, %ymm0 |
| 94 | ; FMA3-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] |
| 95 | ; FMA3-NEXT: retq |
| 96 | ; |
| 97 | ; FMA4-LABEL: mul_subadd_ps256: |
| 98 | ; FMA4: # BB#0: # %entry |
| 99 | ; FMA4-NEXT: vmulps %ymm1, %ymm0, %ymm0 |
| 100 | ; FMA4-NEXT: vsubps %ymm2, %ymm0, %ymm1 |
| 101 | ; FMA4-NEXT: vaddps %ymm2, %ymm0, %ymm0 |
| 102 | ; FMA4-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] |
| 103 | ; FMA4-NEXT: retq |
| 104 | entry: |
| 105 | %AB = fmul <8 x float> %A, %B |
| 106 | %Sub = fsub <8 x float> %AB, %C |
| 107 | %Add = fadd <8 x float> %AB, %C |
| 108 | %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| 109 | ret <8 x float> %subadd |
| 110 | } |
| 111 | |
| 112 | define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 { |
| 113 | ; FMA3_256-LABEL: mul_subadd_pd512: |
| 114 | ; FMA3_256: # BB#0: # %entry |
| 115 | ; FMA3_256-NEXT: vmulpd %ymm2, %ymm0, %ymm0 |
| 116 | ; FMA3_256-NEXT: vmulpd %ymm3, %ymm1, %ymm1 |
| 117 | ; FMA3_256-NEXT: vsubpd %ymm5, %ymm1, %ymm2 |
| 118 | ; FMA3_256-NEXT: vsubpd %ymm4, %ymm0, %ymm3 |
| 119 | ; FMA3_256-NEXT: vaddpd %ymm5, %ymm1, %ymm1 |
| 120 | ; FMA3_256-NEXT: vaddpd %ymm4, %ymm0, %ymm0 |
| 121 | ; FMA3_256-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3] |
| 122 | ; FMA3_256-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3] |
| 123 | ; FMA3_256-NEXT: retq |
| 124 | ; |
| 125 | ; FMA3_512-LABEL: mul_subadd_pd512: |
| 126 | ; FMA3_512: # BB#0: # %entry |
| 127 | ; FMA3_512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 |
| 128 | ; FMA3_512-NEXT: vsubpd %zmm2, %zmm0, %zmm1 |
| 129 | ; FMA3_512-NEXT: vaddpd %zmm2, %zmm0, %zmm0 |
| 130 | ; FMA3_512-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[7] |
| 131 | ; FMA3_512-NEXT: retq |
| 132 | ; |
| 133 | ; FMA4-LABEL: mul_subadd_pd512: |
| 134 | ; FMA4: # BB#0: # %entry |
| 135 | ; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0 |
| 136 | ; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1 |
| 137 | ; FMA4-NEXT: vsubpd %ymm5, %ymm1, %ymm2 |
| 138 | ; FMA4-NEXT: vsubpd %ymm4, %ymm0, %ymm3 |
| 139 | ; FMA4-NEXT: vaddpd %ymm5, %ymm1, %ymm1 |
| 140 | ; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0 |
| 141 | ; FMA4-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3] |
| 142 | ; FMA4-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3] |
| 143 | ; FMA4-NEXT: retq |
| 144 | entry: |
| 145 | %AB = fmul <8 x double> %A, %B |
| 146 | %Sub = fsub <8 x double> %AB, %C |
| 147 | %Add = fadd <8 x double> %AB, %C |
| 148 | %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| 149 | ret <8 x double> %subadd |
| 150 | } |
| 151 | |
| 152 | define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 { |
| 153 | ; FMA3_256-LABEL: mul_subadd_ps512: |
| 154 | ; FMA3_256: # BB#0: # %entry |
| 155 | ; FMA3_256-NEXT: vmulps %ymm2, %ymm0, %ymm0 |
| 156 | ; FMA3_256-NEXT: vmulps %ymm3, %ymm1, %ymm1 |
| 157 | ; FMA3_256-NEXT: vsubps %ymm5, %ymm1, %ymm2 |
| 158 | ; FMA3_256-NEXT: vsubps %ymm4, %ymm0, %ymm3 |
| 159 | ; FMA3_256-NEXT: vaddps %ymm5, %ymm1, %ymm1 |
| 160 | ; FMA3_256-NEXT: vaddps %ymm4, %ymm0, %ymm0 |
| 161 | ; FMA3_256-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7] |
| 162 | ; FMA3_256-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] |
| 163 | ; FMA3_256-NEXT: retq |
| 164 | ; |
| 165 | ; FMA3_512-LABEL: mul_subadd_ps512: |
| 166 | ; FMA3_512: # BB#0: # %entry |
| 167 | ; FMA3_512-NEXT: vmulps %zmm1, %zmm0, %zmm1 |
| 168 | ; FMA3_512-NEXT: vaddps %zmm2, %zmm1, %zmm0 |
| 169 | ; FMA3_512-NEXT: movw $-21846, %ax # imm = 0xAAAA |
| 170 | ; FMA3_512-NEXT: kmovw %eax, %k1 |
| 171 | ; FMA3_512-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} |
| 172 | ; FMA3_512-NEXT: retq |
| 173 | ; |
| 174 | ; FMA4-LABEL: mul_subadd_ps512: |
| 175 | ; FMA4: # BB#0: # %entry |
| 176 | ; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 |
| 177 | ; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 |
| 178 | ; FMA4-NEXT: vsubps %ymm5, %ymm1, %ymm2 |
| 179 | ; FMA4-NEXT: vsubps %ymm4, %ymm0, %ymm3 |
| 180 | ; FMA4-NEXT: vaddps %ymm5, %ymm1, %ymm1 |
| 181 | ; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0 |
| 182 | ; FMA4-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7] |
| 183 | ; FMA4-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] |
| 184 | ; FMA4-NEXT: retq |
| 185 | entry: |
| 186 | %AB = fmul <16 x float> %A, %B |
| 187 | %Sub = fsub <16 x float> %AB, %C |
| 188 | %Add = fadd <16 x float> %AB, %C |
| 189 | %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> |
| 190 | ret <16 x float> %subadd |
| 191 | } |
| 192 | |
| 193 | attributes #0 = { nounwind "unsafe-fp-math"="true" } |