Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 1 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s |
| 2 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s |
Konstantin Zhuravlyov | c40d9f2 | 2017-12-08 20:52:28 +0000 | [diff] [blame] | 3 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 4 | |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 5 | ; GCN-LABEL: {{^}}fptrunc_f32_to_f16: |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 6 | ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] |
| 7 | ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] |
| 8 | ; GCN: buffer_store_short v[[R_F16]] |
| 9 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 10 | define amdgpu_kernel void @fptrunc_f32_to_f16( |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 11 | half addrspace(1)* %r, |
| 12 | float addrspace(1)* %a) { |
| 13 | entry: |
| 14 | %a.val = load float, float addrspace(1)* %a |
| 15 | %r.val = fptrunc float %a.val to half |
| 16 | store half %r.val, half addrspace(1)* %r |
| 17 | ret void |
| 18 | } |
| 19 | |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 20 | ; GCN-LABEL: {{^}}fptrunc_f64_to_f16: |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 21 | ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}} |
| 22 | ; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}} |
| 23 | ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] |
| 24 | ; GCN: buffer_store_short v[[R_F16]] |
| 25 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 26 | define amdgpu_kernel void @fptrunc_f64_to_f16( |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 27 | half addrspace(1)* %r, |
| 28 | double addrspace(1)* %a) { |
| 29 | entry: |
| 30 | %a.val = load double, double addrspace(1)* %a |
| 31 | %r.val = fptrunc double %a.val to half |
| 32 | store half %r.val, half addrspace(1)* %r |
| 33 | ret void |
| 34 | } |
| 35 | |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 36 | ; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16: |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 37 | ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}} |
| 38 | ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 39 | ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] |
| 40 | ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 41 | ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 42 | |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 43 | ; VI-DAG: v_cvt_f16_f32_sdwa v[[R_F16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 44 | ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 45 | |
| 46 | ; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] |
Konstantin Zhuravlyov | d24aeb2 | 2017-04-13 23:17:00 +0000 | [diff] [blame] | 47 | ; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] |
| 48 | ; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 49 | |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 50 | ; GCN: buffer_store_dword v[[R_V2_F16]] |
| 51 | ; GCN: s_endpgm |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 52 | |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 53 | define amdgpu_kernel void @fptrunc_v2f32_to_v2f16( |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 54 | <2 x half> addrspace(1)* %r, |
| 55 | <2 x float> addrspace(1)* %a) { |
| 56 | entry: |
| 57 | %a.val = load <2 x float>, <2 x float> addrspace(1)* %a |
| 58 | %r.val = fptrunc <2 x float> %a.val to <2 x half> |
| 59 | store <2 x half> %r.val, <2 x half> addrspace(1)* %r |
| 60 | ret void |
| 61 | } |
| 62 | |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 63 | ; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16: |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 64 | ; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}} |
Matt Arsenault | 86e02ce | 2017-03-15 19:04:26 +0000 | [diff] [blame] | 65 | ; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}} |
| 66 | ; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}} |
| 67 | ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] |
Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 68 | ; |
| 69 | ; SI-DAG: v_cvt_f16_f32_e32 v[[CVTHI:[0-9]+]], v[[A_F32_1]] |
| 70 | ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[CVTHI]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 71 | |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 72 | ; VI: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD |
| 73 | |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 74 | ; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 75 | |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 76 | ; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] |
Konstantin Zhuravlyov | d24aeb2 | 2017-04-13 23:17:00 +0000 | [diff] [blame] | 77 | ; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] |
| 78 | ; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 79 | |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 80 | ; GCN: buffer_store_dword v[[R_V2_F16]] |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 81 | |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 82 | define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( |
Konstantin Zhuravlyov | f86e4b7 | 2016-11-13 07:01:11 +0000 | [diff] [blame] | 83 | <2 x half> addrspace(1)* %r, |
| 84 | <2 x double> addrspace(1)* %a) { |
| 85 | entry: |
| 86 | %a.val = load <2 x double>, <2 x double> addrspace(1)* %a |
| 87 | %r.val = fptrunc <2 x double> %a.val to <2 x half> |
| 88 | store <2 x half> %r.val, <2 x half> addrspace(1)* %r |
| 89 | ret void |
| 90 | } |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 91 | |
| 92 | ; GCN-LABEL: {{^}}fneg_fptrunc_f32_to_f16: |
| 93 | ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] |
| 94 | ; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -v[[A_F32]] |
| 95 | ; GCN: buffer_store_short v[[R_F16]] |
| 96 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 97 | define amdgpu_kernel void @fneg_fptrunc_f32_to_f16( |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 98 | half addrspace(1)* %r, |
| 99 | float addrspace(1)* %a) { |
| 100 | entry: |
| 101 | %a.val = load float, float addrspace(1)* %a |
| 102 | %a.fneg = fsub float -0.0, %a.val |
| 103 | %r.val = fptrunc float %a.fneg to half |
| 104 | store half %r.val, half addrspace(1)* %r |
| 105 | ret void |
| 106 | } |
| 107 | |
| 108 | ; GCN-LABEL: {{^}}fabs_fptrunc_f32_to_f16: |
| 109 | ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] |
| 110 | ; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]| |
| 111 | ; GCN: buffer_store_short v[[R_F16]] |
| 112 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 113 | define amdgpu_kernel void @fabs_fptrunc_f32_to_f16( |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 114 | half addrspace(1)* %r, |
| 115 | float addrspace(1)* %a) { |
| 116 | entry: |
| 117 | %a.val = load float, float addrspace(1)* %a |
| 118 | %a.fabs = call float @llvm.fabs.f32(float %a.val) |
| 119 | %r.val = fptrunc float %a.fabs to half |
| 120 | store half %r.val, half addrspace(1)* %r |
| 121 | ret void |
| 122 | } |
| 123 | |
| 124 | ; GCN-LABEL: {{^}}fneg_fabs_fptrunc_f32_to_f16: |
| 125 | ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] |
| 126 | ; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -|v[[A_F32]]| |
| 127 | ; GCN: buffer_store_short v[[R_F16]] |
| 128 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 129 | define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16( |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 130 | half addrspace(1)* %r, |
Matt Arsenault | 8edfaee | 2017-03-31 19:53:03 +0000 | [diff] [blame] | 131 | float addrspace(1)* %a) #0 { |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 132 | entry: |
| 133 | %a.val = load float, float addrspace(1)* %a |
| 134 | %a.fabs = call float @llvm.fabs.f32(float %a.val) |
| 135 | %a.fneg.fabs = fsub float -0.0, %a.fabs |
| 136 | %r.val = fptrunc float %a.fneg.fabs to half |
| 137 | store half %r.val, half addrspace(1)* %r |
| 138 | ret void |
| 139 | } |
| 140 | |
Matt Arsenault | 8edfaee | 2017-03-31 19:53:03 +0000 | [diff] [blame] | 141 | ; GCN-LABEL: {{^}}fptrunc_f32_to_f16_zext_i32: |
| 142 | ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] |
| 143 | ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] |
| 144 | ; GCN-NOT: v[[R_F16]] |
| 145 | ; GCN: buffer_store_dword v[[R_F16]] |
| 146 | define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32( |
| 147 | i32 addrspace(1)* %r, |
| 148 | float addrspace(1)* %a) #0 { |
| 149 | entry: |
| 150 | %a.val = load float, float addrspace(1)* %a |
| 151 | %r.val = fptrunc float %a.val to half |
| 152 | %r.i16 = bitcast half %r.val to i16 |
| 153 | %zext = zext i16 %r.i16 to i32 |
| 154 | store i32 %zext, i32 addrspace(1)* %r |
| 155 | ret void |
| 156 | } |
| 157 | |
| 158 | ; GCN-LABEL: {{^}}fptrunc_fabs_f32_to_f16_zext_i32: |
| 159 | ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] |
| 160 | ; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]| |
| 161 | ; GCN-NOT: v[[R_F16]] |
| 162 | ; GCN: buffer_store_dword v[[R_F16]] |
| 163 | define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32( |
| 164 | i32 addrspace(1)* %r, |
| 165 | float addrspace(1)* %a) #0 { |
| 166 | entry: |
| 167 | %a.val = load float, float addrspace(1)* %a |
| 168 | %a.fabs = call float @llvm.fabs.f32(float %a.val) |
| 169 | %r.val = fptrunc float %a.fabs to half |
| 170 | %r.i16 = bitcast half %r.val to i16 |
| 171 | %zext = zext i16 %r.i16 to i32 |
| 172 | store i32 %zext, i32 addrspace(1)* %r |
| 173 | ret void |
| 174 | } |
| 175 | |
| 176 | ; GCN-LABEL: {{^}}fptrunc_f32_to_f16_sext_i32: |
| 177 | ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] |
| 178 | ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] |
| 179 | ; GCN: v_bfe_i32 v[[R_F16_SEXT:[0-9]+]], v[[R_F16]], 0, 16 |
| 180 | ; GCN: buffer_store_dword v[[R_F16_SEXT]] |
| 181 | define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32( |
| 182 | i32 addrspace(1)* %r, |
| 183 | float addrspace(1)* %a) #0 { |
| 184 | entry: |
| 185 | %a.val = load float, float addrspace(1)* %a |
| 186 | %r.val = fptrunc float %a.val to half |
| 187 | %r.i16 = bitcast half %r.val to i16 |
| 188 | %zext = sext i16 %r.i16 to i32 |
| 189 | store i32 %zext, i32 addrspace(1)* %r |
| 190 | ret void |
| 191 | } |
| 192 | |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 193 | declare float @llvm.fabs.f32(float) #1 |
| 194 | |
Matt Arsenault | 8edfaee | 2017-03-31 19:53:03 +0000 | [diff] [blame] | 195 | attributes #0 = { nounwind } |
Matt Arsenault | 9dba9bd | 2017-02-02 02:27:04 +0000 | [diff] [blame] | 196 | attributes #1 = { nounwind readnone } |