Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900,GFX9 %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX906,GFX9 %s |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 3 | ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s |
| 4 | ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s |
| 5 | |
| 6 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 7 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] |
| 8 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 9 | ; VI: v_mac_f32 |
| 10 | ; CI: v_mad_f32 |
| 11 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 12 | %src0.ext = fpext half %src0 to float |
| 13 | %src1.ext = fpext half %src1 to float |
| 14 | %src2.ext = fpext half %src2 to float |
| 15 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 16 | ret float %result |
| 17 | } |
| 18 | |
| 19 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_int: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 20 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
| 21 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 22 | ; CIVI: v_mac_f32 |
| 23 | define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 { |
| 24 | %src0.hi = lshr i32 %src0, 16 |
| 25 | %src1.hi = lshr i32 %src1, 16 |
| 26 | %src2.hi = lshr i32 %src2, 16 |
| 27 | %src0.i16 = trunc i32 %src0.hi to i16 |
| 28 | %src1.i16 = trunc i32 %src1.hi to i16 |
| 29 | %src2.i16 = trunc i32 %src2.hi to i16 |
| 30 | %src0.fp16 = bitcast i16 %src0.i16 to half |
| 31 | %src1.fp16 = bitcast i16 %src1.i16 to half |
| 32 | %src2.fp16 = bitcast i16 %src2.i16 to half |
| 33 | %src0.ext = fpext half %src0.fp16 to float |
| 34 | %src1.ext = fpext half %src1.fp16 to float |
| 35 | %src2.ext = fpext half %src2.fp16 to float |
| 36 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 37 | ret float %result |
| 38 | } |
| 39 | |
| 40 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_elt: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 41 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
| 42 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 43 | ; VI: v_mac_f32 |
| 44 | ; CI: v_mad_f32 |
| 45 | define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 46 | %src0.hi = extractelement <2 x half> %src0, i32 1 |
| 47 | %src1.hi = extractelement <2 x half> %src1, i32 1 |
| 48 | %src2.hi = extractelement <2 x half> %src2, i32 1 |
| 49 | %src0.ext = fpext half %src0.hi to float |
| 50 | %src1.ext = fpext half %src1.hi to float |
| 51 | %src2.ext = fpext half %src2.hi to float |
| 52 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 53 | ret float %result |
| 54 | } |
| 55 | |
| 56 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 57 | ; GFX900: v_mov_b32_e32 v3, v1 |
| 58 | ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] |
| 59 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel_hi:[1,1,1] |
| 60 | |
| 61 | ; GFX906: v_mov_b32_e32 v3, v1 |
| 62 | ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] |
| 63 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v3, v2 op_sel_hi:[1,1,1] |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 64 | |
| 65 | ; CIVI: v_mac_f32 |
| 66 | define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 67 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 68 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 69 | %src2.ext = fpext <2 x half> %src2 to <2 x float> |
| 70 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) |
| 71 | ret <2 x float> %result |
| 72 | } |
| 73 | |
| 74 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle: |
| 75 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 76 | ; GFX900-NEXT: v_mov_b32_e32 v3, v1 |
| 77 | ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] |
| 78 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] |
| 79 | ; GFX900-NEXT: s_setpc_b64 |
| 80 | |
| 81 | ; GFX906-NEXT: v_mov_b32_e32 v3, v1 |
| 82 | ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] |
| 83 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] |
| 84 | ; GFX906-NEXT: s_setpc_b64 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 85 | |
| 86 | ; CIVI: v_mac_f32 |
| 87 | define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 88 | %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0> |
| 89 | %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1> |
| 90 | %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1> |
| 91 | %src0.ext = fpext <2 x half> %src0.shuf to <2 x float> |
| 92 | %src1.ext = fpext <2 x half> %src1.shuf to <2 x float> |
| 93 | %src2.ext = fpext <2 x half> %src2.shuf to <2 x float> |
| 94 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) |
| 95 | ret <2 x float> %result |
| 96 | } |
| 97 | |
| 98 | ; GCN-LABEL: {{^}}v_mad_mix_f32_negf16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 99 | ; GFX900: s_waitcnt |
| 100 | ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 101 | ; GFX900-NEXT: s_setpc_b64 |
| 102 | |
| 103 | ; GFX906: s_waitcnt |
| 104 | ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 105 | ; GFX906-NEXT: s_setpc_b64 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 106 | |
| 107 | ; CIVI: v_mad_f32 |
| 108 | define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 109 | %src0.ext = fpext half %src0 to float |
| 110 | %src1.ext = fpext half %src1 to float |
| 111 | %src2.ext = fpext half %src2 to float |
| 112 | %src0.ext.neg = fsub float -0.0, %src0.ext |
| 113 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext) |
| 114 | ret float %result |
| 115 | } |
| 116 | |
| 117 | ; GCN-LABEL: {{^}}v_mad_mix_f32_absf16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 118 | ; GFX900: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] |
| 119 | ; GFX906: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 120 | |
| 121 | ; CIVI: v_mad_f32 |
| 122 | define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 123 | %src0.ext = fpext half %src0 to float |
| 124 | %src1.ext = fpext half %src1 to float |
| 125 | %src2.ext = fpext half %src2 to float |
| 126 | %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) |
| 127 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) |
| 128 | ret float %result |
| 129 | } |
| 130 | |
| 131 | ; GCN-LABEL: {{^}}v_mad_mix_f32_negabsf16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 132 | ; GFX900: s_waitcnt |
| 133 | ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] |
| 134 | ; GFX900-NEXT: s_setpc_b64 |
| 135 | |
| 136 | ; GFX906: s_waitcnt |
| 137 | ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] |
| 138 | ; GFX906-NEXT: s_setpc_b64 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 139 | |
| 140 | ; CIVI: v_mad_f32 |
| 141 | define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 142 | %src0.ext = fpext half %src0 to float |
| 143 | %src1.ext = fpext half %src1 to float |
| 144 | %src2.ext = fpext half %src2 to float |
| 145 | %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) |
| 146 | %src0.ext.neg.abs = fsub float -0.0, %src0.ext.abs |
| 147 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext) |
| 148 | ret float %result |
| 149 | } |
| 150 | |
| 151 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32: |
| 152 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 153 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 154 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 155 | ; GFX9-NEXT: s_setpc_b64 |
| 156 | |
| 157 | ; CIVI: v_mad_f32 |
| 158 | define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { |
| 159 | %src0.ext = fpext half %src0 to float |
| 160 | %src1.ext = fpext half %src1 to float |
| 161 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 162 | ret float %result |
| 163 | } |
| 164 | |
| 165 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negf32: |
| 166 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 167 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding |
| 168 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 169 | ; GFX9-NEXT: s_setpc_b64 |
| 170 | |
| 171 | ; CIVI: v_mad_f32 |
| 172 | define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 { |
| 173 | %src0.ext = fpext half %src0 to float |
| 174 | %src1.ext = fpext half %src1 to float |
| 175 | %src2.neg = fsub float -0.0, %src2 |
| 176 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg) |
| 177 | ret float %result |
| 178 | } |
| 179 | |
| 180 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_absf32: |
| 181 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 182 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding |
| 183 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 184 | ; GFX9-NEXT: s_setpc_b64 |
| 185 | |
| 186 | ; CIVI: v_mad_f32 |
| 187 | define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 { |
| 188 | %src0.ext = fpext half %src0 to float |
| 189 | %src1.ext = fpext half %src1 to float |
| 190 | %src2.abs = call float @llvm.fabs.f32(float %src2) |
| 191 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs) |
| 192 | ret float %result |
| 193 | } |
| 194 | |
| 195 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negabsf32: |
| 196 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 197 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding |
| 198 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 199 | ; GFX9-NEXT: s_setpc_b64 |
| 200 | |
| 201 | ; CIVI: v_mad_f32 |
| 202 | define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 { |
| 203 | %src0.ext = fpext half %src0 to float |
| 204 | %src1.ext = fpext half %src1 to float |
| 205 | %src2.abs = call float @llvm.fabs.f32(float %src2) |
| 206 | %src2.neg.abs = fsub float -0.0, %src2.abs |
| 207 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs) |
| 208 | ret float %result |
| 209 | } |
| 210 | |
| 211 | ; TODO: Fold inline immediates. Need to be careful because it is an |
| 212 | ; f16 inline immediate that may be converted to f32, not an actual f32 |
| 213 | ; inline immediate. |
| 214 | |
| 215 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imm1: |
| 216 | ; GCN: s_waitcnt |
| 217 | ; GFX9: v_mov_b32_e32 v2, 1.0 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 218 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 219 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 220 | |
| 221 | ; CIVI: v_mad_f32 v0, v0, v1, 1.0 |
| 222 | ; GCN-NEXT: s_setpc_b64 |
| 223 | define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { |
| 224 | %src0.ext = fpext half %src0 to float |
| 225 | %src1.ext = fpext half %src1 to float |
| 226 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0) |
| 227 | ret float %result |
| 228 | } |
| 229 | |
| 230 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: |
| 231 | ; GCN: s_waitcnt |
| 232 | ; GFX9: v_mov_b32_e32 v2, 0.15915494 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 233 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 234 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 235 | ; VI: v_mad_f32 v0, v0, v1, 0.15915494 |
| 236 | define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 { |
| 237 | %src0.ext = fpext half %src0 to float |
| 238 | %src1.ext = fpext half %src1 to float |
| 239 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000) |
| 240 | ret float %result |
| 241 | } |
| 242 | |
| 243 | ; Attempt to break inline immediate folding. If the operand is |
| 244 | ; interpreted as f32, the inline immediate is really the f16 inline |
| 245 | ; imm value converted to f32. |
| 246 | ; fpext f16 1/2pi = 0x3e230000 |
| 247 | ; f32 1/2pi = 0x3e22f983 |
| 248 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: |
| 249 | ; GFX9: v_mov_b32_e32 v2, 0x3e230000 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 250 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 251 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 252 | |
| 253 | ; CIVI: v_madak_f32 v0, v0, v1, 0x3e230000 |
| 254 | define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 { |
| 255 | %src0.ext = fpext half %src0 to float |
| 256 | %src1.ext = fpext half %src1 to float |
| 257 | %src2 = fpext half 0xH3118 to float |
| 258 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 259 | ret float %result |
| 260 | } |
| 261 | |
| 262 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: |
| 263 | ; GFX9: v_mov_b32_e32 v2, 0x367c0000 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 264 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 265 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 266 | |
| 267 | ; CIVI: v_madak_f32 v0, v0, v1, 0x367c0000 |
| 268 | define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { |
| 269 | %src0.ext = fpext half %src0 to float |
| 270 | %src1.ext = fpext half %src1 to float |
| 271 | %src2 = fpext half 0xH003F to float |
| 272 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 273 | ret float %result |
| 274 | } |
| 275 | |
| 276 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1: |
| 277 | ; GFX9: v_mov_b32_e32 v2, v1 |
| 278 | ; GFX9: v_mov_b32_e32 v3, 1.0 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 279 | ; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 280 | ; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding |
| 281 | |
| 282 | ; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 283 | ; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 284 | define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 { |
| 285 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 286 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 287 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>) |
| 288 | ret <2 x float> %result |
| 289 | } |
| 290 | |
| 291 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi: |
| 292 | ; GFX9: v_mov_b32_e32 v2, v1 |
| 293 | ; GFX9: v_mov_b32_e32 v3, 0x3e230000 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 294 | ; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 295 | ; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding |
| 296 | |
| 297 | ; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 298 | ; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 299 | define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { |
| 300 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 301 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 302 | %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> |
| 303 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2) |
| 304 | ret <2 x float> %result |
| 305 | } |
| 306 | |
| 307 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi: |
| 308 | ; GFX9: v_mov_b32_e32 v2, v1 |
| 309 | ; GFX9: v_mov_b32_e32 v3, 0.15915494 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 310 | |
| 311 | ; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 312 | ; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding |
| 313 | |
| 314 | ; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 315 | ; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 316 | define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { |
| 317 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 318 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 319 | %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> |
| 320 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>) |
| 321 | ret <2 x float> %result |
| 322 | } |
| 323 | |
| 324 | ; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 325 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding |
| 326 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 327 | ; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} |
| 328 | ; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} |
| 329 | define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 330 | %src0.hi = extractelement <2 x half> %src0, i32 1 |
| 331 | %src1.hi = extractelement <2 x half> %src1, i32 1 |
| 332 | %src2.hi = extractelement <2 x half> %src2, i32 1 |
| 333 | %src0.ext = fpext half %src0.hi to float |
| 334 | %src1.ext = fpext half %src1.hi to float |
| 335 | %src2.ext = fpext half %src2.hi to float |
| 336 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 337 | %max = call float @llvm.maxnum.f32(float %result, float 0.0) |
| 338 | %clamp = call float @llvm.minnum.f32(float %max, float 1.0) |
| 339 | ret float %clamp |
| 340 | } |
| 341 | |
| 342 | ; GCN-LABEL: no_mix_simple: |
| 343 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 344 | ; GCN-NEXT: v_{{mad|fma}}_f32 v0, v0, v1, v2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 345 | ; GCN-NEXT: s_setpc_b64 |
| 346 | define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { |
| 347 | %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) |
| 348 | ret float %result |
| 349 | } |
| 350 | |
| 351 | ; GCN-LABEL: no_mix_simple_fabs: |
| 352 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 353 | ; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2 |
| 354 | ; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2 |
| 355 | ; GFX906-NEXT: v_fma_f32 v0, v1, |v0|, v2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 356 | ; GCN-NEXT: s_setpc_b64 |
| 357 | define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { |
| 358 | %src0.fabs = call float @llvm.fabs.f32(float %src0) |
| 359 | %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2) |
| 360 | ret float %result |
| 361 | } |
| 362 | |
| 363 | ; FIXME: Should abe able to select in thits case |
| 364 | ; All sources are converted from f16, so it doesn't matter |
| 365 | ; v_mad_mix_f32 flushes. |
| 366 | |
| 367 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 368 | ; GFX900: v_cvt_f32_f16 |
| 369 | ; GFX900: v_cvt_f32_f16 |
| 370 | ; GFX900: v_cvt_f32_f16 |
| 371 | ; GFX900: v_fma_f32 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 372 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 { |
| 373 | %src0.ext = fpext half %src0 to float |
| 374 | %src1.ext = fpext half %src1 to float |
| 375 | %src2.ext = fpext half %src2 to float |
| 376 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 377 | ret float %result |
| 378 | } |
| 379 | |
| 380 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 381 | ; GFX900: v_cvt_f32_f16 |
| 382 | ; GFX900: v_cvt_f32_f16 |
| 383 | ; GFX900: v_fma_f32 |
| 384 | |
| 385 | ; GFX906-NOT: v_cvt_f32_f16 |
| 386 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 387 | define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 { |
| 388 | %src0.ext = fpext half %src0 to float |
| 389 | %src1.ext = fpext half %src1 to float |
| 390 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 391 | ret float %result |
| 392 | } |
| 393 | |
| 394 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: |
| 395 | ; GFX9: v_cvt_f32_f16 |
| 396 | ; GFX9: v_cvt_f32_f16 |
| 397 | ; GFX9: v_cvt_f32_f16 |
| 398 | ; GFX9: v_mul_f32 |
| 399 | ; GFX9: v_add_f32 |
| 400 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 { |
| 401 | %src0.ext = fpext half %src0 to float |
| 402 | %src1.ext = fpext half %src1 to float |
| 403 | %src2.ext = fpext half %src2 to float |
| 404 | %mul = fmul float %src0.ext, %src1.ext |
| 405 | %result = fadd float %mul, %src2.ext |
| 406 | ret float %result |
| 407 | } |
| 408 | |
| 409 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: |
| 410 | ; GFX9: v_cvt_f32_f16 |
| 411 | ; GFX9: v_cvt_f32_f16 |
| 412 | ; GFX9: v_mul_f32 |
| 413 | ; GFX9: v_add_f32 |
| 414 | define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 { |
| 415 | %src0.ext = fpext half %src0 to float |
| 416 | %src1.ext = fpext half %src1 to float |
| 417 | %mul = fmul float %src0.ext, %src1.ext |
| 418 | %result = fadd float %mul, %src2 |
| 419 | ret float %result |
| 420 | } |
| 421 | |
| 422 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: |
| 423 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 424 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 425 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 426 | ; GFX9-NEXT: s_setpc_b64 |
| 427 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 { |
| 428 | %src0.ext = fpext half %src0 to float |
| 429 | %src1.ext = fpext half %src1 to float |
| 430 | %src2.ext = fpext half %src2 to float |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 431 | %mul = fmul contract float %src0.ext, %src1.ext |
| 432 | %result = fadd contract float %mul, %src2.ext |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 433 | ret float %result |
| 434 | } |
| 435 | |
| 436 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: |
| 437 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 438 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 439 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 440 | ; GFX9-NEXT: s_setpc_b64 |
| 441 | define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 { |
| 442 | %src0.ext = fpext half %src0 to float |
| 443 | %src1.ext = fpext half %src1 to float |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 444 | %mul = fmul contract float %src0.ext, %src1.ext |
| 445 | %result = fadd contract float %mul, %src2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 446 | ret float %result |
| 447 | } |
| 448 | |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 449 | ; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: |
| 450 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 451 | ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 452 | ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 453 | ; GFX9-NEXT: s_setpc_b64 |
| 454 | |
| 455 | ; CIVI: v_mad_f32 |
| 456 | define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 457 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 458 | %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 |
| 459 | %src0.neg = fsub half -0.0, %src0 |
| 460 | %src0.ext = fpext half %src0.neg to float |
| 461 | %src1.ext = fpext half %src1 to float |
| 462 | %src2.ext = fpext half %src2 to float |
| 463 | ; %src0.ext.neg = fsub float -0.0, %src0.ext |
| 464 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 465 | ret float %result |
| 466 | } |
| 467 | |
| 468 | ; Make sure we don't fold pre-cvt fneg if we already have a fabs |
| 469 | ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 470 | ; GFX900: s_waitcnt |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 471 | define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 472 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 473 | %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 |
| 474 | %src0.neg = fsub half -0.0, %src0 |
| 475 | %src0.ext = fpext half %src0.neg to float |
| 476 | %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) |
| 477 | %src1.ext = fpext half %src1 to float |
| 478 | %src2.ext = fpext half %src2 to float |
| 479 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) |
| 480 | ret float %result |
| 481 | } |
| 482 | |
| 483 | ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: |
| 484 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 485 | ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 486 | ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 487 | ; GFX9-NEXT: s_setpc_b64 |
| 488 | define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 489 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 490 | %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 |
| 491 | %src0.abs = call half @llvm.fabs.f16(half %src0) |
| 492 | %src0.ext = fpext half %src0.abs to float |
| 493 | %src1.ext = fpext half %src1 to float |
| 494 | %src2.ext = fpext half %src2 to float |
| 495 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 496 | ret float %result |
| 497 | } |
| 498 | |
| 499 | ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: |
| 500 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 501 | ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 502 | ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 503 | ; GFX9-NEXT: s_setpc_b64 |
| 504 | define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 505 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 506 | %fneg = fsub <2 x half> <half -0.0, half -0.0>, %src0.arg.bc |
| 507 | %src0 = extractelement <2 x half> %fneg, i32 1 |
| 508 | %src0.ext = fpext half %src0 to float |
| 509 | %src1.ext = fpext half %src1 to float |
| 510 | %src2.ext = fpext half %src2 to float |
| 511 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 512 | ret float %result |
| 513 | } |
| 514 | |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 515 | ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: |
| 516 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 517 | ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 518 | ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 519 | ; GFX9-NEXT: s_setpc_b64 |
| 520 | define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 521 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 522 | %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) |
| 523 | %src0 = extractelement <2 x half> %fabs, i32 1 |
| 524 | %src0.ext = fpext half %src0 to float |
| 525 | %src1.ext = fpext half %src1 to float |
| 526 | %src2.ext = fpext half %src2 to float |
| 527 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 528 | ret float %result |
| 529 | } |
| 530 | |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 531 | ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: |
| 532 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame^] | 533 | ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 534 | ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 535 | ; GFX9-NEXT: s_setpc_b64 |
| 536 | define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 537 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 538 | %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) |
| 539 | %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs |
| 540 | %src0 = extractelement <2 x half> %fneg.fabs, i32 1 |
| 541 | %src0.ext = fpext half %src0 to float |
| 542 | %src1.ext = fpext half %src1 to float |
| 543 | %src2.ext = fpext half %src2 to float |
| 544 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 545 | ret float %result |
| 546 | } |
| 547 | |
| 548 | declare half @llvm.fabs.f16(half) #2 |
| 549 | declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 550 | declare float @llvm.fabs.f32(float) #2 |
| 551 | declare float @llvm.minnum.f32(float, float) #2 |
| 552 | declare float @llvm.maxnum.f32(float, float) #2 |
| 553 | declare float @llvm.fmuladd.f32(float, float, float) #2 |
| 554 | declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2 |
| 555 | |
| 556 | attributes #0 = { nounwind "target-features"="-fp32-denormals" } |
| 557 | attributes #1 = { nounwind "target-features"="+fp32-denormals" } |
| 558 | attributes #2 = { nounwind readnone speculatable } |