Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 1 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s |
| 2 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s |
| 3 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 4 | ; FIXME: Merge into imm.ll |
| 5 | |
| 6 | ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16: |
| 7 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}} |
| 8 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 9 | define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 10 | store <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> addrspace(1)* %out |
| 11 | ret void |
| 12 | } |
| 13 | |
| 14 | ; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16: |
| 15 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} |
| 16 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 17 | define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 18 | store <2 x half> <half 0.0, half 0.0>, <2 x half> addrspace(1)* %out |
| 19 | ret void |
| 20 | } |
| 21 | |
| 22 | ; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16: |
| 23 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}} |
| 24 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 25 | define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 26 | store <2 x half> <half -0.0, half -0.0>, <2 x half> addrspace(1)* %out |
| 27 | ret void |
| 28 | } |
| 29 | |
| 30 | ; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16: |
| 31 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800{{$}} |
| 32 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 33 | define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 34 | store <2 x half> <half 0.5, half 0.5>, <2 x half> addrspace(1)* %out |
| 35 | ret void |
| 36 | } |
| 37 | |
| 38 | ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16: |
| 39 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800{{$}} |
| 40 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 41 | define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 42 | store <2 x half> <half -0.5, half -0.5>, <2 x half> addrspace(1)* %out |
| 43 | ret void |
| 44 | } |
| 45 | |
| 46 | ; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16: |
| 47 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}} |
| 48 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 49 | define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 50 | store <2 x half> <half 1.0, half 1.0>, <2 x half> addrspace(1)* %out |
| 51 | ret void |
| 52 | } |
| 53 | |
| 54 | ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16: |
| 55 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}} |
| 56 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 57 | define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 58 | store <2 x half> <half -1.0, half -1.0>, <2 x half> addrspace(1)* %out |
| 59 | ret void |
| 60 | } |
| 61 | |
| 62 | ; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16: |
| 63 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000{{$}} |
| 64 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 65 | define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 66 | store <2 x half> <half 2.0, half 2.0>, <2 x half> addrspace(1)* %out |
| 67 | ret void |
| 68 | } |
| 69 | |
| 70 | ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16: |
| 71 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000{{$}} |
| 72 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 73 | define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 74 | store <2 x half> <half -2.0, half -2.0>, <2 x half> addrspace(1)* %out |
| 75 | ret void |
| 76 | } |
| 77 | |
| 78 | ; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16: |
| 79 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400{{$}} |
| 80 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 81 | define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 82 | store <2 x half> <half 4.0, half 4.0>, <2 x half> addrspace(1)* %out |
| 83 | ret void |
| 84 | } |
| 85 | |
| 86 | ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16: |
| 87 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400{{$}} |
| 88 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 89 | define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 90 | store <2 x half> <half -4.0, half -4.0>, <2 x half> addrspace(1)* %out |
| 91 | ret void |
| 92 | } |
| 93 | |
| 94 | ; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16: |
| 95 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118{{$}} |
| 96 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 97 | define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 98 | store <2 x half> <half 0xH3118, half 0xH3118>, <2 x half> addrspace(1)* %out |
| 99 | ret void |
| 100 | } |
| 101 | |
| 102 | ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16: |
| 103 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118{{$}} |
| 104 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 105 | define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 106 | store <2 x half> <half 0xHB118, half 0xHB118>, <2 x half> addrspace(1)* %out |
| 107 | ret void |
| 108 | } |
| 109 | |
| 110 | ; GCN-LABEL: {{^}}store_literal_imm_v2f16: |
| 111 | ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00 |
| 112 | ; GCN: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 113 | define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 114 | store <2 x half> <half 4096.0, half 4096.0>, <2 x half> addrspace(1)* %out |
| 115 | ret void |
| 116 | } |
| 117 | |
| 118 | ; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16: |
| 119 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 160f857 | 2018-04-19 21:16:50 +0000 | [diff] [blame] | 120 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 121 | ; GFX9: buffer_store_dword [[REG]] |
| 122 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 123 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 124 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 125 | ; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 126 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 127 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 128 | |
| 129 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 130 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 131 | ; VI: v_or_b32 |
| 132 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 133 | define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 134 | %y = fadd <2 x half> %x, <half 0.0, half 0.0> |
| 135 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 136 | ret void |
| 137 | } |
| 138 | |
| 139 | ; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16: |
| 140 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 141 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 142 | ; GFX9: buffer_store_dword [[REG]] |
| 143 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 144 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 145 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 146 | ; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 147 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 148 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 149 | |
| 150 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 151 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0.5 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 152 | ; VI: v_or_b32 |
| 153 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 154 | define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 155 | %y = fadd <2 x half> %x, <half 0.5, half 0.5> |
| 156 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 157 | ret void |
| 158 | } |
| 159 | |
| 160 | ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16: |
| 161 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 162 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 163 | ; GFX9: buffer_store_dword [[REG]] |
| 164 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 165 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 166 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 167 | ; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 168 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 169 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 170 | |
| 171 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 172 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -0.5 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 173 | ; VI: v_or_b32 |
| 174 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 175 | define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 176 | %y = fadd <2 x half> %x, <half -0.5, half -0.5> |
| 177 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 178 | ret void |
| 179 | } |
| 180 | |
| 181 | ; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16: |
| 182 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 183 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 184 | ; GFX9: buffer_store_dword [[REG]] |
| 185 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 186 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 187 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 188 | ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 189 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 190 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 191 | |
| 192 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 193 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1.0 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 194 | ; VI: v_or_b32 |
| 195 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 196 | define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 197 | %y = fadd <2 x half> %x, <half 1.0, half 1.0> |
| 198 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 199 | ret void |
| 200 | } |
| 201 | |
| 202 | ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16: |
| 203 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 204 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 205 | ; GFX9: buffer_store_dword [[REG]] |
| 206 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 207 | |
| 208 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 209 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
| 210 | ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0xbc00 |
| 211 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 212 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 213 | |
| 214 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 215 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -1.0 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 216 | ; VI: v_or_b32 |
| 217 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 218 | define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 219 | %y = fadd <2 x half> %x, <half -1.0, half -1.0> |
| 220 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 221 | ret void |
| 222 | } |
| 223 | |
| 224 | ; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16: |
| 225 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 226 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 227 | ; GFX9: buffer_store_dword [[REG]] |
| 228 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 229 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 230 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 231 | ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 232 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 233 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 234 | |
| 235 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 236 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2.0 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 237 | ; VI: v_or_b32 |
| 238 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 239 | define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 240 | %y = fadd <2 x half> %x, <half 2.0, half 2.0> |
| 241 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 242 | ret void |
| 243 | } |
| 244 | |
| 245 | ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16: |
| 246 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 247 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 248 | ; GFX9: buffer_store_dword [[REG]] |
| 249 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 250 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 251 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 252 | ; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 253 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 254 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 255 | |
| 256 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 257 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -2.0 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 258 | ; VI: v_or_b32 |
| 259 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 260 | define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 261 | %y = fadd <2 x half> %x, <half -2.0, half -2.0> |
| 262 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 263 | ret void |
| 264 | } |
| 265 | |
| 266 | ; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16: |
| 267 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 268 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 269 | ; GFX9: buffer_store_dword [[REG]] |
| 270 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 271 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 272 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 273 | ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 274 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 275 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 276 | |
| 277 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 278 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 4.0 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 279 | ; VI: v_or_b32 |
| 280 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 281 | define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 282 | %y = fadd <2 x half> %x, <half 4.0, half 4.0> |
| 283 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 284 | ret void |
| 285 | } |
| 286 | |
| 287 | ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16: |
| 288 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 289 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 290 | ; GFX9: buffer_store_dword [[REG]] |
| 291 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 292 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 293 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 294 | ; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 295 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 296 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 297 | |
| 298 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 299 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -4.0 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 300 | ; VI: v_or_b32 |
| 301 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 302 | define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 303 | %y = fadd <2 x half> %x, <half -4.0, half -4.0> |
| 304 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 305 | ret void |
| 306 | } |
| 307 | |
| 308 | ; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_v2f16: |
| 309 | ; GFX9: buffer_load_dword [[VAL:v[0-9]+]] |
| 310 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 |
| 311 | ; GFX9: buffer_store_dword [[REG]] |
| 312 | |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 313 | ; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 |
| 314 | ; VI-DAG: buffer_load_dword |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 315 | ; VI-NOT: and |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 316 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 317 | ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}} |
| 318 | ; VI: v_or_b32 |
| 319 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 320 | define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 321 | %x = load <2 x half>, <2 x half> addrspace(1)* %in |
| 322 | %y = fadd <2 x half> %x, <half 0.5, half 0.5> |
| 323 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 324 | ret void |
| 325 | } |
| 326 | |
| 327 | ; GCN-LABEL: {{^}}commute_add_literal_v2f16: |
Matt Arsenault | 786eeea | 2017-05-17 20:00:00 +0000 | [diff] [blame] | 328 | ; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]] |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 329 | ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}} |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 330 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 331 | ; GFX9: buffer_store_dword [[REG]] |
| 332 | |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 333 | ; VI-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 334 | ; VI-DAG: buffer_load_dword |
| 335 | ; VI-NOT: and |
Alexander Timofeev | db7ee76 | 2018-09-11 11:56:50 +0000 | [diff] [blame] | 336 | ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}} |
| 337 | ; gfx8 does not support sreg or imm in sdwa - this will be move then |
| 338 | ; VI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] |
| 339 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 340 | ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 341 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 342 | define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 343 | %x = load <2 x half>, <2 x half> addrspace(1)* %in |
| 344 | %y = fadd <2 x half> %x, <half 1024.0, half 1024.0> |
| 345 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 346 | ret void |
| 347 | } |
| 348 | |
| 349 | ; GCN-LABEL: {{^}}add_inline_imm_1_v2f16: |
| 350 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 351 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 352 | ; GFX9: buffer_store_dword [[REG]] |
| 353 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 354 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 355 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
| 356 | ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1{{$}} |
| 357 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 358 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 359 | |
| 360 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 361 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 362 | ; VI: v_or_b32 |
| 363 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 364 | define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 365 | %y = fadd <2 x half> %x, <half 0xH0001, half 0xH0001> |
| 366 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 367 | ret void |
| 368 | } |
| 369 | |
| 370 | ; GCN-LABEL: {{^}}add_inline_imm_2_v2f16: |
| 371 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 372 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 373 | ; GFX9: buffer_store_dword [[REG]] |
| 374 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 375 | |
| 376 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 377 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
| 378 | ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2{{$}} |
| 379 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 380 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 381 | |
| 382 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 383 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 384 | ; VI: v_or_b32 |
| 385 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 386 | define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 387 | %y = fadd <2 x half> %x, <half 0xH0002, half 0xH0002> |
| 388 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 389 | ret void |
| 390 | } |
| 391 | |
| 392 | ; GCN-LABEL: {{^}}add_inline_imm_16_v2f16: |
| 393 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 8b20b7d | 2018-04-17 23:09:05 +0000 | [diff] [blame] | 394 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0]{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 395 | ; GFX9: buffer_store_dword [[REG]] |
| 396 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 397 | |
| 398 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 399 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
| 400 | ; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16{{$}} |
| 401 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 402 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 403 | |
| 404 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 405 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16{{$}} |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 406 | ; VI: v_or_b32 |
| 407 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 408 | define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 409 | %y = fadd <2 x half> %x, <half 0xH0010, half 0xH0010> |
| 410 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 411 | ret void |
| 412 | } |
| 413 | |
| 414 | ; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16: |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 415 | ; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, -1 |
| 416 | ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 417 | ; GFX9: buffer_store_dword [[REG]] |
| 418 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 419 | ; VI: s_load_dword [[VAL:s[0-9]+]] |
| 420 | ; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1{{$}} |
| 421 | ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]] |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 422 | ; VI: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 423 | define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 424 | %xbc = bitcast <2 x half> %x to i32 |
| 425 | %y = add i32 %xbc, -1 |
| 426 | %ybc = bitcast i32 %y to <2 x half> |
| 427 | store <2 x half> %ybc, <2 x half> addrspace(1)* %out |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 428 | ret void |
| 429 | } |
| 430 | |
| 431 | ; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16: |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 432 | ; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfffefffe |
| 433 | ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 434 | ; GFX9: buffer_store_dword [[REG]] |
| 435 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 436 | ; VI: s_load_dword [[VAL:s[0-9]+]] |
| 437 | ; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe{{$}} |
| 438 | ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]] |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 439 | ; VI: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 440 | define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 441 | %xbc = bitcast <2 x half> %x to i32 |
| 442 | %y = add i32 %xbc, 4294901758 ; 0xfffefffe |
| 443 | %ybc = bitcast i32 %y to <2 x half> |
| 444 | store <2 x half> %ybc, <2 x half> addrspace(1)* %out |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 445 | ret void |
| 446 | } |
| 447 | |
| 448 | ; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16: |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 449 | ; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfff0fff0 |
| 450 | ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 451 | ; GFX9: buffer_store_dword [[REG]] |
| 452 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 453 | |
| 454 | ; VI: s_load_dword [[VAL:s[0-9]+]] |
| 455 | ; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0{{$}} |
| 456 | ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]] |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 457 | ; VI: buffer_store_dword [[REG]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 458 | define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Sanjay Patel | dad3d13 | 2018-03-19 19:23:53 +0000 | [diff] [blame] | 459 | %xbc = bitcast <2 x half> %x to i32 |
| 460 | %y = add i32 %xbc, 4293984240 ; 0xfff0fff0 |
| 461 | %ybc = bitcast i32 %y to <2 x half> |
| 462 | store <2 x half> %ybc, <2 x half> addrspace(1)* %out |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 463 | ret void |
| 464 | } |
| 465 | |
| 466 | ; GCN-LABEL: {{^}}add_inline_imm_63_v2f16: |
| 467 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
| 468 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63 |
| 469 | ; GFX9: buffer_store_dword [[REG]] |
| 470 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 471 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 472 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 473 | ; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 474 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 475 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 476 | |
| 477 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 478 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 63 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 479 | ; VI: v_or_b32 |
| 480 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 481 | define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 482 | %y = fadd <2 x half> %x, <half 0xH003F, half 0xH003F> |
| 483 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 484 | ret void |
| 485 | } |
| 486 | |
| 487 | ; GCN-LABEL: {{^}}add_inline_imm_64_v2f16: |
| 488 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] |
| 489 | ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64 |
| 490 | ; GFX9: buffer_store_dword [[REG]] |
| 491 | |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 492 | ; FIXME: Shouldn't need right shift and SDWA, also extra copy |
| 493 | ; VI-DAG: s_load_dword [[VAL:s[0-9]+]] |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 494 | ; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64 |
Matt Arsenault | 1349a04 | 2018-05-22 06:32:10 +0000 | [diff] [blame] | 495 | ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 |
| 496 | ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] |
| 497 | |
| 498 | ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| 499 | ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 64 |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 500 | ; VI: v_or_b32 |
| 501 | ; VI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 502 | define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { |
Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 503 | %y = fadd <2 x half> %x, <half 0xH0040, half 0xH0040> |
| 504 | store <2 x half> %y, <2 x half> addrspace(1)* %out |
| 505 | ret void |
| 506 | } |
| 507 | |
| 508 | attributes #0 = { nounwind } |