Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900,GFX9 %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX906,GFX9 %s |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 3 | ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s |
| 4 | ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s |
| 5 | |
| 6 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 7 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] |
| 8 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 9 | ; VI: v_mac_f32 |
| 10 | ; CI: v_mad_f32 |
| 11 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 12 | %src0.ext = fpext half %src0 to float |
| 13 | %src1.ext = fpext half %src1 to float |
| 14 | %src2.ext = fpext half %src2 to float |
| 15 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 16 | ret float %result |
| 17 | } |
| 18 | |
| 19 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_int: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 20 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
| 21 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 22 | ; CIVI: v_mac_f32 |
| 23 | define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 { |
| 24 | %src0.hi = lshr i32 %src0, 16 |
| 25 | %src1.hi = lshr i32 %src1, 16 |
| 26 | %src2.hi = lshr i32 %src2, 16 |
| 27 | %src0.i16 = trunc i32 %src0.hi to i16 |
| 28 | %src1.i16 = trunc i32 %src1.hi to i16 |
| 29 | %src2.i16 = trunc i32 %src2.hi to i16 |
| 30 | %src0.fp16 = bitcast i16 %src0.i16 to half |
| 31 | %src1.fp16 = bitcast i16 %src1.i16 to half |
| 32 | %src2.fp16 = bitcast i16 %src2.i16 to half |
| 33 | %src0.ext = fpext half %src0.fp16 to float |
| 34 | %src1.ext = fpext half %src1.fp16 to float |
| 35 | %src2.ext = fpext half %src2.fp16 to float |
| 36 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 37 | ret float %result |
| 38 | } |
| 39 | |
| 40 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_elt: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 41 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
| 42 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 43 | ; VI: v_mac_f32 |
| 44 | ; CI: v_mad_f32 |
| 45 | define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 46 | %src0.hi = extractelement <2 x half> %src0, i32 1 |
| 47 | %src1.hi = extractelement <2 x half> %src1, i32 1 |
| 48 | %src2.hi = extractelement <2 x half> %src2, i32 1 |
| 49 | %src0.ext = fpext half %src0.hi to float |
| 50 | %src1.ext = fpext half %src1.hi to float |
| 51 | %src2.ext = fpext half %src2.hi to float |
| 52 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 53 | ret float %result |
| 54 | } |
| 55 | |
| 56 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32: |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 57 | ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] |
| 58 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] |
| 59 | ; GFX900-NEXT: v_mov_b32_e32 v1, v3 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 60 | |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 61 | ; GFX906: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] |
| 62 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] |
| 63 | ; GFX906-NEXT: v_mov_b32_e32 v1, v3 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 64 | |
| 65 | ; CIVI: v_mac_f32 |
| 66 | define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 67 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 68 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 69 | %src2.ext = fpext <2 x half> %src2 to <2 x float> |
| 70 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) |
| 71 | ret <2 x float> %result |
| 72 | } |
| 73 | |
| 74 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle: |
| 75 | ; GCN: s_waitcnt |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 76 | ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] |
| 77 | ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] |
| 78 | ; GFX900-NEXT: v_mov_b32_e32 v0, v3 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 79 | ; GFX900-NEXT: s_setpc_b64 |
| 80 | |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 81 | ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] |
| 82 | ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] |
| 83 | ; GFX906-NEXT: v_mov_b32_e32 v0, v3 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 84 | ; GFX906-NEXT: s_setpc_b64 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 85 | |
| 86 | ; CIVI: v_mac_f32 |
| 87 | define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 88 | %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0> |
| 89 | %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1> |
| 90 | %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1> |
| 91 | %src0.ext = fpext <2 x half> %src0.shuf to <2 x float> |
| 92 | %src1.ext = fpext <2 x half> %src1.shuf to <2 x float> |
| 93 | %src2.ext = fpext <2 x half> %src2.shuf to <2 x float> |
| 94 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) |
| 95 | ret <2 x float> %result |
| 96 | } |
| 97 | |
| 98 | ; GCN-LABEL: {{^}}v_mad_mix_f32_negf16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 99 | ; GFX900: s_waitcnt |
| 100 | ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 101 | ; GFX900-NEXT: s_setpc_b64 |
| 102 | |
| 103 | ; GFX906: s_waitcnt |
| 104 | ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 105 | ; GFX906-NEXT: s_setpc_b64 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 106 | |
| 107 | ; CIVI: v_mad_f32 |
| 108 | define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 109 | %src0.ext = fpext half %src0 to float |
| 110 | %src1.ext = fpext half %src1 to float |
| 111 | %src2.ext = fpext half %src2 to float |
| 112 | %src0.ext.neg = fsub float -0.0, %src0.ext |
| 113 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext) |
| 114 | ret float %result |
| 115 | } |
| 116 | |
| 117 | ; GCN-LABEL: {{^}}v_mad_mix_f32_absf16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 118 | ; GFX900: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] |
| 119 | ; GFX906: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 120 | |
| 121 | ; CIVI: v_mad_f32 |
| 122 | define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 123 | %src0.ext = fpext half %src0 to float |
| 124 | %src1.ext = fpext half %src1 to float |
| 125 | %src2.ext = fpext half %src2 to float |
| 126 | %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) |
| 127 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) |
| 128 | ret float %result |
| 129 | } |
| 130 | |
| 131 | ; GCN-LABEL: {{^}}v_mad_mix_f32_negabsf16lo_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 132 | ; GFX900: s_waitcnt |
| 133 | ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] |
| 134 | ; GFX900-NEXT: s_setpc_b64 |
| 135 | |
| 136 | ; GFX906: s_waitcnt |
| 137 | ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] |
| 138 | ; GFX906-NEXT: s_setpc_b64 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 139 | |
| 140 | ; CIVI: v_mad_f32 |
| 141 | define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { |
| 142 | %src0.ext = fpext half %src0 to float |
| 143 | %src1.ext = fpext half %src1 to float |
| 144 | %src2.ext = fpext half %src2 to float |
| 145 | %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) |
| 146 | %src0.ext.neg.abs = fsub float -0.0, %src0.ext.abs |
| 147 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext) |
| 148 | ret float %result |
| 149 | } |
| 150 | |
| 151 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32: |
| 152 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 153 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 154 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 155 | ; GFX9-NEXT: s_setpc_b64 |
| 156 | |
| 157 | ; CIVI: v_mad_f32 |
| 158 | define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { |
| 159 | %src0.ext = fpext half %src0 to float |
| 160 | %src1.ext = fpext half %src1 to float |
| 161 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 162 | ret float %result |
| 163 | } |
| 164 | |
| 165 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negf32: |
| 166 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 167 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding |
| 168 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 169 | ; GFX9-NEXT: s_setpc_b64 |
| 170 | |
| 171 | ; CIVI: v_mad_f32 |
| 172 | define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 { |
| 173 | %src0.ext = fpext half %src0 to float |
| 174 | %src1.ext = fpext half %src1 to float |
| 175 | %src2.neg = fsub float -0.0, %src2 |
| 176 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg) |
| 177 | ret float %result |
| 178 | } |
| 179 | |
| 180 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_absf32: |
| 181 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 182 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding |
| 183 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 184 | ; GFX9-NEXT: s_setpc_b64 |
| 185 | |
| 186 | ; CIVI: v_mad_f32 |
| 187 | define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 { |
| 188 | %src0.ext = fpext half %src0 to float |
| 189 | %src1.ext = fpext half %src1 to float |
| 190 | %src2.abs = call float @llvm.fabs.f32(float %src2) |
| 191 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs) |
| 192 | ret float %result |
| 193 | } |
| 194 | |
| 195 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negabsf32: |
| 196 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 197 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding |
| 198 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 199 | ; GFX9-NEXT: s_setpc_b64 |
| 200 | |
| 201 | ; CIVI: v_mad_f32 |
| 202 | define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 { |
| 203 | %src0.ext = fpext half %src0 to float |
| 204 | %src1.ext = fpext half %src1 to float |
| 205 | %src2.abs = call float @llvm.fabs.f32(float %src2) |
| 206 | %src2.neg.abs = fsub float -0.0, %src2.abs |
| 207 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs) |
| 208 | ret float %result |
| 209 | } |
| 210 | |
| 211 | ; TODO: Fold inline immediates. Need to be careful because it is an |
| 212 | ; f16 inline immediate that may be converted to f32, not an actual f32 |
| 213 | ; inline immediate. |
| 214 | |
| 215 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imm1: |
| 216 | ; GCN: s_waitcnt |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 217 | ; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 1.0 |
| 218 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
| 219 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 220 | |
| 221 | ; CIVI: v_mad_f32 v0, v0, v1, 1.0 |
| 222 | ; GCN-NEXT: s_setpc_b64 |
| 223 | define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { |
| 224 | %src0.ext = fpext half %src0 to float |
| 225 | %src1.ext = fpext half %src1 to float |
| 226 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0) |
| 227 | ret float %result |
| 228 | } |
| 229 | |
| 230 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: |
| 231 | ; GCN: s_waitcnt |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 232 | ; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0.15915494 |
| 233 | ; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
| 234 | ; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 235 | ; VI: v_mad_f32 v0, v0, v1, 0.15915494 |
| 236 | define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 { |
| 237 | %src0.ext = fpext half %src0 to float |
| 238 | %src1.ext = fpext half %src1 to float |
| 239 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000) |
| 240 | ret float %result |
| 241 | } |
| 242 | |
| 243 | ; Attempt to break inline immediate folding. If the operand is |
| 244 | ; interpreted as f32, the inline immediate is really the f16 inline |
| 245 | ; imm value converted to f32. |
| 246 | ; fpext f16 1/2pi = 0x3e230000 |
| 247 | ; f32 1/2pi = 0x3e22f983 |
| 248 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 249 | ; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0x3e230000 |
| 250 | ; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
| 251 | ; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 252 | |
| 253 | ; CIVI: v_madak_f32 v0, v0, v1, 0x3e230000 |
| 254 | define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 { |
| 255 | %src0.ext = fpext half %src0 to float |
| 256 | %src1.ext = fpext half %src1 to float |
| 257 | %src2 = fpext half 0xH3118 to float |
| 258 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 259 | ret float %result |
| 260 | } |
| 261 | |
| 262 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 263 | ; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0x367c0000 |
| 264 | ; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
| 265 | ; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 266 | |
| 267 | ; CIVI: v_madak_f32 v0, v0, v1, 0x367c0000 |
| 268 | define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { |
| 269 | %src0.ext = fpext half %src0 to float |
| 270 | %src1.ext = fpext half %src1 to float |
| 271 | %src2 = fpext half 0xH003F to float |
| 272 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 273 | ret float %result |
| 274 | } |
| 275 | |
| 276 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1: |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 277 | ; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 1.0 |
| 278 | ; GFX900: v_mad_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 279 | ; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 280 | ; GFX900: v_mov_b32_e32 v1, v2 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 281 | |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 282 | ; GFX906: v_fma_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 283 | ; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 284 | ; GFX906: v_mov_b32_e32 v1, v2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 285 | define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 { |
| 286 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 287 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 288 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>) |
| 289 | ret <2 x float> %result |
| 290 | } |
| 291 | |
| 292 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi: |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 293 | ; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0x3e230000 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 294 | |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 295 | ; GFX900: v_mad_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 296 | ; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 297 | ; GFX900: v_mov_b32_e32 v1, v2 |
| 298 | |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 299 | ; GFX906: v_fma_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 300 | ; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 301 | ; GFX906: v_mov_b32_e32 v1, v2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 302 | define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { |
| 303 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 304 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 305 | %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> |
| 306 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2) |
| 307 | ret <2 x float> %result |
| 308 | } |
| 309 | |
| 310 | ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi: |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 311 | ; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0.15915494 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 312 | |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 313 | ; GFX900: v_mad_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 314 | ; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 315 | ; GFX900: v_mov_b32_e32 v1, v2 |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 316 | |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 317 | ; GFX906: v_fma_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding |
| 318 | ; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | 9ced1e0 | 2018-07-31 19:05:14 +0000 | [diff] [blame] | 319 | ; GFX906: v_mov_b32_e32 v1, v2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 320 | define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { |
| 321 | %src0.ext = fpext <2 x half> %src0 to <2 x float> |
| 322 | %src1.ext = fpext <2 x half> %src1 to <2 x float> |
| 323 | %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> |
| 324 | %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>) |
| 325 | ret <2 x float> %result |
| 326 | } |
| 327 | |
| 328 | ; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 329 | ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding |
| 330 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 331 | ; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} |
| 332 | ; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} |
| 333 | define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { |
| 334 | %src0.hi = extractelement <2 x half> %src0, i32 1 |
| 335 | %src1.hi = extractelement <2 x half> %src1, i32 1 |
| 336 | %src2.hi = extractelement <2 x half> %src2, i32 1 |
| 337 | %src0.ext = fpext half %src0.hi to float |
| 338 | %src1.ext = fpext half %src1.hi to float |
| 339 | %src2.ext = fpext half %src2.hi to float |
| 340 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 341 | %max = call float @llvm.maxnum.f32(float %result, float 0.0) |
| 342 | %clamp = call float @llvm.minnum.f32(float %max, float 1.0) |
| 343 | ret float %clamp |
| 344 | } |
| 345 | |
| 346 | ; GCN-LABEL: no_mix_simple: |
| 347 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 348 | ; GCN-NEXT: v_{{mad|fma}}_f32 v0, v0, v1, v2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 349 | ; GCN-NEXT: s_setpc_b64 |
| 350 | define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { |
| 351 | %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) |
| 352 | ret float %result |
| 353 | } |
| 354 | |
| 355 | ; GCN-LABEL: no_mix_simple_fabs: |
| 356 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 357 | ; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2 |
| 358 | ; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2 |
| 359 | ; GFX906-NEXT: v_fma_f32 v0, v1, |v0|, v2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 360 | ; GCN-NEXT: s_setpc_b64 |
| 361 | define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { |
| 362 | %src0.fabs = call float @llvm.fabs.f32(float %src0) |
| 363 | %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2) |
| 364 | ret float %result |
| 365 | } |
| 366 | |
| 367 | ; FIXME: Should abe able to select in thits case |
| 368 | ; All sources are converted from f16, so it doesn't matter |
| 369 | ; v_mad_mix_f32 flushes. |
| 370 | |
| 371 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 372 | ; GFX900: v_cvt_f32_f16 |
| 373 | ; GFX900: v_cvt_f32_f16 |
| 374 | ; GFX900: v_cvt_f32_f16 |
| 375 | ; GFX900: v_fma_f32 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 376 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 { |
| 377 | %src0.ext = fpext half %src0 to float |
| 378 | %src1.ext = fpext half %src1 to float |
| 379 | %src2.ext = fpext half %src2 to float |
| 380 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 381 | ret float %result |
| 382 | } |
| 383 | |
| 384 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 385 | ; GFX900: v_cvt_f32_f16 |
| 386 | ; GFX900: v_cvt_f32_f16 |
| 387 | ; GFX900: v_fma_f32 |
| 388 | |
| 389 | ; GFX906-NOT: v_cvt_f32_f16 |
| 390 | ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 391 | define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 { |
| 392 | %src0.ext = fpext half %src0 to float |
| 393 | %src1.ext = fpext half %src1 to float |
| 394 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) |
| 395 | ret float %result |
| 396 | } |
| 397 | |
| 398 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: |
| 399 | ; GFX9: v_cvt_f32_f16 |
| 400 | ; GFX9: v_cvt_f32_f16 |
| 401 | ; GFX9: v_cvt_f32_f16 |
| 402 | ; GFX9: v_mul_f32 |
| 403 | ; GFX9: v_add_f32 |
| 404 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 { |
| 405 | %src0.ext = fpext half %src0 to float |
| 406 | %src1.ext = fpext half %src1 to float |
| 407 | %src2.ext = fpext half %src2 to float |
| 408 | %mul = fmul float %src0.ext, %src1.ext |
| 409 | %result = fadd float %mul, %src2.ext |
| 410 | ret float %result |
| 411 | } |
| 412 | |
| 413 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: |
| 414 | ; GFX9: v_cvt_f32_f16 |
| 415 | ; GFX9: v_cvt_f32_f16 |
| 416 | ; GFX9: v_mul_f32 |
| 417 | ; GFX9: v_add_f32 |
| 418 | define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 { |
| 419 | %src0.ext = fpext half %src0 to float |
| 420 | %src1.ext = fpext half %src1 to float |
| 421 | %mul = fmul float %src0.ext, %src1.ext |
| 422 | %result = fadd float %mul, %src2 |
| 423 | ret float %result |
| 424 | } |
| 425 | |
| 426 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: |
| 427 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 428 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 429 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 430 | ; GFX9-NEXT: s_setpc_b64 |
| 431 | define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 { |
| 432 | %src0.ext = fpext half %src0 to float |
| 433 | %src1.ext = fpext half %src1 to float |
| 434 | %src2.ext = fpext half %src2 to float |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 435 | %mul = fmul contract float %src0.ext, %src1.ext |
| 436 | %result = fadd contract float %mul, %src2.ext |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 437 | ret float %result |
| 438 | } |
| 439 | |
| 440 | ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: |
| 441 | ; GCN: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 442 | ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
| 443 | ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 444 | ; GFX9-NEXT: s_setpc_b64 |
| 445 | define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 { |
| 446 | %src0.ext = fpext half %src0 to float |
| 447 | %src1.ext = fpext half %src1 to float |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 448 | %mul = fmul contract float %src0.ext, %src1.ext |
| 449 | %result = fadd contract float %mul, %src2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 450 | ret float %result |
| 451 | } |
| 452 | |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 453 | ; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: |
| 454 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 455 | ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
| 456 | ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 457 | ; GFX9-NEXT: s_setpc_b64 |
| 458 | |
| 459 | ; CIVI: v_mad_f32 |
| 460 | define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 461 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 462 | %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 |
| 463 | %src0.neg = fsub half -0.0, %src0 |
| 464 | %src0.ext = fpext half %src0.neg to float |
| 465 | %src1.ext = fpext half %src1 to float |
| 466 | %src2.ext = fpext half %src2 to float |
| 467 | ; %src0.ext.neg = fsub float -0.0, %src0.ext |
| 468 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 469 | ret float %result |
| 470 | } |
| 471 | |
| 472 | ; Make sure we don't fold pre-cvt fneg if we already have a fabs |
| 473 | ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 474 | ; GFX900: s_waitcnt |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 475 | define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 476 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 477 | %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 |
| 478 | %src0.neg = fsub half -0.0, %src0 |
| 479 | %src0.ext = fpext half %src0.neg to float |
| 480 | %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) |
| 481 | %src1.ext = fpext half %src1 to float |
| 482 | %src2.ext = fpext half %src2 to float |
| 483 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) |
| 484 | ret float %result |
| 485 | } |
| 486 | |
| 487 | ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: |
| 488 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 489 | ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 490 | ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 491 | ; GFX9-NEXT: s_setpc_b64 |
| 492 | define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 493 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 494 | %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 |
| 495 | %src0.abs = call half @llvm.fabs.f16(half %src0) |
| 496 | %src0.ext = fpext half %src0.abs to float |
| 497 | %src1.ext = fpext half %src1 to float |
| 498 | %src2.ext = fpext half %src2 to float |
| 499 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 500 | ret float %result |
| 501 | } |
| 502 | |
| 503 | ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: |
| 504 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 505 | ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 506 | ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 507 | ; GFX9-NEXT: s_setpc_b64 |
| 508 | define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 509 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 510 | %fneg = fsub <2 x half> <half -0.0, half -0.0>, %src0.arg.bc |
| 511 | %src0 = extractelement <2 x half> %fneg, i32 1 |
| 512 | %src0.ext = fpext half %src0 to float |
| 513 | %src1.ext = fpext half %src1 to float |
| 514 | %src2.ext = fpext half %src2 to float |
| 515 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 516 | ret float %result |
| 517 | } |
| 518 | |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 519 | ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: |
| 520 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 521 | ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 522 | ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 523 | ; GFX9-NEXT: s_setpc_b64 |
| 524 | define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 525 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 526 | %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) |
| 527 | %src0 = extractelement <2 x half> %fabs, i32 1 |
| 528 | %src0.ext = fpext half %src0 to float |
| 529 | %src1.ext = fpext half %src1 to float |
| 530 | %src2.ext = fpext half %src2 to float |
| 531 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 532 | ret float %result |
| 533 | } |
| 534 | |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 535 | ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: |
| 536 | ; GFX9: s_waitcnt |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 537 | ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
| 538 | ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] |
Matt Arsenault | 550c66d | 2017-10-13 20:45:49 +0000 | [diff] [blame] | 539 | ; GFX9-NEXT: s_setpc_b64 |
| 540 | define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { |
| 541 | %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> |
| 542 | %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) |
| 543 | %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs |
| 544 | %src0 = extractelement <2 x half> %fneg.fabs, i32 1 |
| 545 | %src0.ext = fpext half %src0 to float |
| 546 | %src1.ext = fpext half %src1 to float |
| 547 | %src2.ext = fpext half %src2 to float |
| 548 | %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) |
| 549 | ret float %result |
| 550 | } |
| 551 | |
| 552 | declare half @llvm.fabs.f16(half) #2 |
| 553 | declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 |
Matt Arsenault | d7e2303 | 2017-09-07 18:05:07 +0000 | [diff] [blame] | 554 | declare float @llvm.fabs.f32(float) #2 |
| 555 | declare float @llvm.minnum.f32(float, float) #2 |
| 556 | declare float @llvm.maxnum.f32(float, float) #2 |
| 557 | declare float @llvm.fmuladd.f32(float, float, float) #2 |
| 558 | declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2 |
| 559 | |
| 560 | attributes #0 = { nounwind "target-features"="-fp32-denormals" } |
| 561 | attributes #1 = { nounwind "target-features"="+fp32-denormals" } |
| 562 | attributes #2 = { nounwind readnone speculatable } |