blob: 77cc717e057b2fad7c5681084b7321900b3410dd [file] [log] [blame]
Matt Arsenault1349a042018-05-22 06:32:10 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
Matt Arsenaulteb522e62017-02-27 22:15:25 +00004; FIXME: Merge into imm.ll
5
6; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
7; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
8; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00009define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000010 store <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> addrspace(1)* %out
11 ret void
12}
13
14; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
15; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
16; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000018 store <2 x half> <half 0.0, half 0.0>, <2 x half> addrspace(1)* %out
19 ret void
20}
21
22; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
23; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
24; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000026 store <2 x half> <half -0.0, half -0.0>, <2 x half> addrspace(1)* %out
27 ret void
28}
29
30; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
31; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800{{$}}
32; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000033define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000034 store <2 x half> <half 0.5, half 0.5>, <2 x half> addrspace(1)* %out
35 ret void
36}
37
38; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
39; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800{{$}}
40; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000041define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000042 store <2 x half> <half -0.5, half -0.5>, <2 x half> addrspace(1)* %out
43 ret void
44}
45
46; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
47; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
48; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000050 store <2 x half> <half 1.0, half 1.0>, <2 x half> addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
55; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
56; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000058 store <2 x half> <half -1.0, half -1.0>, <2 x half> addrspace(1)* %out
59 ret void
60}
61
62; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
63; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000{{$}}
64; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000066 store <2 x half> <half 2.0, half 2.0>, <2 x half> addrspace(1)* %out
67 ret void
68}
69
70; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
71; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000{{$}}
72; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000073define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000074 store <2 x half> <half -2.0, half -2.0>, <2 x half> addrspace(1)* %out
75 ret void
76}
77
78; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
79; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400{{$}}
80; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000082 store <2 x half> <half 4.0, half 4.0>, <2 x half> addrspace(1)* %out
83 ret void
84}
85
86; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
87; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400{{$}}
88; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000089define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000090 store <2 x half> <half -4.0, half -4.0>, <2 x half> addrspace(1)* %out
91 ret void
92}
93
94; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
95; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118{{$}}
96; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000097define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000098 store <2 x half> <half 0xH3118, half 0xH3118>, <2 x half> addrspace(1)* %out
99 ret void
100}
101
102; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
103; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118{{$}}
104; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000106 store <2 x half> <half 0xHB118, half 0xHB118>, <2 x half> addrspace(1)* %out
107 ret void
108}
109
110; GCN-LABEL: {{^}}store_literal_imm_v2f16:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00
112; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000114 store <2 x half> <half 4096.0, half 4096.0>, <2 x half> addrspace(1)* %out
115 ret void
116}
117
118; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16:
119; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin160f8572018-04-19 21:16:50 +0000120; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000121; GFX9: buffer_store_dword [[REG]]
122
Matt Arsenault1349a042018-05-22 06:32:10 +0000123; FIXME: Shouldn't need right shift and SDWA, also extra copy
124; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000125; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0
Matt Arsenault1349a042018-05-22 06:32:10 +0000126; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
127; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
128
129; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
130; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000131; VI: v_or_b32
132; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000133define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000134 %y = fadd <2 x half> %x, <half 0.0, half 0.0>
135 store <2 x half> %y, <2 x half> addrspace(1)* %out
136 ret void
137}
138
139; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
140; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000141; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000142; GFX9: buffer_store_dword [[REG]]
143
Matt Arsenault1349a042018-05-22 06:32:10 +0000144; FIXME: Shouldn't need right shift and SDWA, also extra copy
145; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000146; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
Matt Arsenault1349a042018-05-22 06:32:10 +0000147; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
148; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
149
150; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
151; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0.5
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000152; VI: v_or_b32
153; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000154define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000155 %y = fadd <2 x half> %x, <half 0.5, half 0.5>
156 store <2 x half> %y, <2 x half> addrspace(1)* %out
157 ret void
158}
159
160; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
161; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000162; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000163; GFX9: buffer_store_dword [[REG]]
164
Matt Arsenault1349a042018-05-22 06:32:10 +0000165; FIXME: Shouldn't need right shift and SDWA, also extra copy
166; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000167; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800
Matt Arsenault1349a042018-05-22 06:32:10 +0000168; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
169; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
170
171; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
172; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -0.5
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000173; VI: v_or_b32
174; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000176 %y = fadd <2 x half> %x, <half -0.5, half -0.5>
177 store <2 x half> %y, <2 x half> addrspace(1)* %out
178 ret void
179}
180
181; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
182; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000183; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000184; GFX9: buffer_store_dword [[REG]]
185
Matt Arsenault1349a042018-05-22 06:32:10 +0000186; FIXME: Shouldn't need right shift and SDWA, also extra copy
187; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000188; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00
Matt Arsenault1349a042018-05-22 06:32:10 +0000189; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
190; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
191
192; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
193; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000194; VI: v_or_b32
195; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000196define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000197 %y = fadd <2 x half> %x, <half 1.0, half 1.0>
198 store <2 x half> %y, <2 x half> addrspace(1)* %out
199 ret void
200}
201
202; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
203; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000204; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000205; GFX9: buffer_store_dword [[REG]]
206
Matt Arsenault1349a042018-05-22 06:32:10 +0000207
208; FIXME: Shouldn't need right shift and SDWA, also extra copy
209; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
210; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0xbc00
211; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
212; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
213
214; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
215; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -1.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000216; VI: v_or_b32
217; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000218define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000219 %y = fadd <2 x half> %x, <half -1.0, half -1.0>
220 store <2 x half> %y, <2 x half> addrspace(1)* %out
221 ret void
222}
223
224; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
225; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000226; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000227; GFX9: buffer_store_dword [[REG]]
228
Matt Arsenault1349a042018-05-22 06:32:10 +0000229; FIXME: Shouldn't need right shift and SDWA, also extra copy
230; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000231; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
Matt Arsenault1349a042018-05-22 06:32:10 +0000232; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
233; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
234
235; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
236; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000237; VI: v_or_b32
238; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000239define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000240 %y = fadd <2 x half> %x, <half 2.0, half 2.0>
241 store <2 x half> %y, <2 x half> addrspace(1)* %out
242 ret void
243}
244
245; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
246; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000247; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000248; GFX9: buffer_store_dword [[REG]]
249
Matt Arsenault1349a042018-05-22 06:32:10 +0000250; FIXME: Shouldn't need right shift and SDWA, also extra copy
251; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000252; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000
Matt Arsenault1349a042018-05-22 06:32:10 +0000253; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
254; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
255
256; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
257; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -2.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000258; VI: v_or_b32
259; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000260define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000261 %y = fadd <2 x half> %x, <half -2.0, half -2.0>
262 store <2 x half> %y, <2 x half> addrspace(1)* %out
263 ret void
264}
265
266; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
267; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000268; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000269; GFX9: buffer_store_dword [[REG]]
270
Matt Arsenault1349a042018-05-22 06:32:10 +0000271; FIXME: Shouldn't need right shift and SDWA, also extra copy
272; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000273; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
Matt Arsenault1349a042018-05-22 06:32:10 +0000274; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
275; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
276
277; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
278; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 4.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000279; VI: v_or_b32
280; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000281define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000282 %y = fadd <2 x half> %x, <half 4.0, half 4.0>
283 store <2 x half> %y, <2 x half> addrspace(1)* %out
284 ret void
285}
286
287; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
288; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000289; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000290; GFX9: buffer_store_dword [[REG]]
291
Matt Arsenault1349a042018-05-22 06:32:10 +0000292; FIXME: Shouldn't need right shift and SDWA, also extra copy
293; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000294; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400
Matt Arsenault1349a042018-05-22 06:32:10 +0000295; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
296; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
297
298; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
299; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -4.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000300; VI: v_or_b32
301; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000302define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000303 %y = fadd <2 x half> %x, <half -4.0, half -4.0>
304 store <2 x half> %y, <2 x half> addrspace(1)* %out
305 ret void
306}
307
308; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_v2f16:
309; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
310; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
311; GFX9: buffer_store_dword [[REG]]
312
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000313; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
314; VI-DAG: buffer_load_dword
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000315; VI-NOT: and
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000316; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000317; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
318; VI: v_or_b32
319; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000320define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000321 %x = load <2 x half>, <2 x half> addrspace(1)* %in
322 %y = fadd <2 x half> %x, <half 0.5, half 0.5>
323 store <2 x half> %y, <2 x half> addrspace(1)* %out
324 ret void
325}
326
327; GCN-LABEL: {{^}}commute_add_literal_v2f16:
Matt Arsenault786eeea2017-05-17 20:00:00 +0000328; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000329; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000330; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000331; GFX9: buffer_store_dword [[REG]]
332
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000333; VI-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000334; VI-DAG: buffer_load_dword
335; VI-NOT: and
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000336; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}
337; gfx8 does not support sreg or imm in sdwa - this will be move then
338; VI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
339; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Sam Kolton9fa16962017-04-06 15:03:28 +0000340; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000341; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000342define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000343 %x = load <2 x half>, <2 x half> addrspace(1)* %in
344 %y = fadd <2 x half> %x, <half 1024.0, half 1024.0>
345 store <2 x half> %y, <2 x half> addrspace(1)* %out
346 ret void
347}
348
349; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
350; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000351; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000352; GFX9: buffer_store_dword [[REG]]
353
Matt Arsenault1349a042018-05-22 06:32:10 +0000354; FIXME: Shouldn't need right shift and SDWA, also extra copy
355; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
356; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1{{$}}
357; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
358; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
359
360; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
361; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000362; VI: v_or_b32
363; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000364define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000365 %y = fadd <2 x half> %x, <half 0xH0001, half 0xH0001>
366 store <2 x half> %y, <2 x half> addrspace(1)* %out
367 ret void
368}
369
370; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
371; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000372; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000373; GFX9: buffer_store_dword [[REG]]
374
Matt Arsenault1349a042018-05-22 06:32:10 +0000375
376; FIXME: Shouldn't need right shift and SDWA, also extra copy
377; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
378; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2{{$}}
379; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
380; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
381
382; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
383; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000384; VI: v_or_b32
385; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000386define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000387 %y = fadd <2 x half> %x, <half 0xH0002, half 0xH0002>
388 store <2 x half> %y, <2 x half> addrspace(1)* %out
389 ret void
390}
391
392; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
393; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000394; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000395; GFX9: buffer_store_dword [[REG]]
396
Matt Arsenault1349a042018-05-22 06:32:10 +0000397
398; FIXME: Shouldn't need right shift and SDWA, also extra copy
399; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
400; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16{{$}}
401; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
402; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
403
404; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
405; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000406; VI: v_or_b32
407; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000408define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000409 %y = fadd <2 x half> %x, <half 0xH0010, half 0xH0010>
410 store <2 x half> %y, <2 x half> addrspace(1)* %out
411 ret void
412}
413
414; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16:
Sanjay Pateldad3d132018-03-19 19:23:53 +0000415; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, -1
416; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000417; GFX9: buffer_store_dword [[REG]]
418
Matt Arsenault1349a042018-05-22 06:32:10 +0000419; VI: s_load_dword [[VAL:s[0-9]+]]
420; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1{{$}}
421; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
Sanjay Pateldad3d132018-03-19 19:23:53 +0000422; VI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000423define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Sanjay Pateldad3d132018-03-19 19:23:53 +0000424 %xbc = bitcast <2 x half> %x to i32
425 %y = add i32 %xbc, -1
426 %ybc = bitcast i32 %y to <2 x half>
427 store <2 x half> %ybc, <2 x half> addrspace(1)* %out
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000428 ret void
429}
430
431; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16:
Sanjay Pateldad3d132018-03-19 19:23:53 +0000432; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfffefffe
433; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000434; GFX9: buffer_store_dword [[REG]]
435
Matt Arsenault1349a042018-05-22 06:32:10 +0000436; VI: s_load_dword [[VAL:s[0-9]+]]
437; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe{{$}}
438; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
Sanjay Pateldad3d132018-03-19 19:23:53 +0000439; VI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000440define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Sanjay Pateldad3d132018-03-19 19:23:53 +0000441 %xbc = bitcast <2 x half> %x to i32
442 %y = add i32 %xbc, 4294901758 ; 0xfffefffe
443 %ybc = bitcast i32 %y to <2 x half>
444 store <2 x half> %ybc, <2 x half> addrspace(1)* %out
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000445 ret void
446}
447
448; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16:
Sanjay Pateldad3d132018-03-19 19:23:53 +0000449; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfff0fff0
450; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000451; GFX9: buffer_store_dword [[REG]]
452
Matt Arsenault1349a042018-05-22 06:32:10 +0000453
454; VI: s_load_dword [[VAL:s[0-9]+]]
455; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0{{$}}
456; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
Sanjay Pateldad3d132018-03-19 19:23:53 +0000457; VI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000458define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Sanjay Pateldad3d132018-03-19 19:23:53 +0000459 %xbc = bitcast <2 x half> %x to i32
460 %y = add i32 %xbc, 4293984240 ; 0xfff0fff0
461 %ybc = bitcast i32 %y to <2 x half>
462 store <2 x half> %ybc, <2 x half> addrspace(1)* %out
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000463 ret void
464}
465
466; GCN-LABEL: {{^}}add_inline_imm_63_v2f16:
467; GFX9: s_load_dword [[VAL:s[0-9]+]]
468; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63
469; GFX9: buffer_store_dword [[REG]]
470
Matt Arsenault1349a042018-05-22 06:32:10 +0000471; FIXME: Shouldn't need right shift and SDWA, also extra copy
472; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000473; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63
Matt Arsenault1349a042018-05-22 06:32:10 +0000474; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
475; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
476
477; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
478; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 63
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000479; VI: v_or_b32
480; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000481define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000482 %y = fadd <2 x half> %x, <half 0xH003F, half 0xH003F>
483 store <2 x half> %y, <2 x half> addrspace(1)* %out
484 ret void
485}
486
487; GCN-LABEL: {{^}}add_inline_imm_64_v2f16:
488; GFX9: s_load_dword [[VAL:s[0-9]+]]
489; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64
490; GFX9: buffer_store_dword [[REG]]
491
Matt Arsenault1349a042018-05-22 06:32:10 +0000492; FIXME: Shouldn't need right shift and SDWA, also extra copy
493; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000494; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64
Matt Arsenault1349a042018-05-22 06:32:10 +0000495; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
496; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
497
498; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
499; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 64
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000500; VI: v_or_b32
501; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000502define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000503 %y = fadd <2 x half> %x, <half 0xH0040, half 0xH0040>
504 store <2 x half> %y, <2 x half> addrspace(1)* %out
505 ret void
506}
507
508attributes #0 = { nounwind }