blob: 3227633496ad5b17e1e2c0420e68c7d562164248 [file] [log] [blame]
Matt Arsenault1349a042018-05-22 06:32:10 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
Matt Arsenaulteb522e62017-02-27 22:15:25 +00004; FIXME: Merge into imm.ll
5
6; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
7; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
8; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00009define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000010 store <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> addrspace(1)* %out
11 ret void
12}
13
14; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
15; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
16; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000018 store <2 x half> <half 0.0, half 0.0>, <2 x half> addrspace(1)* %out
19 ret void
20}
21
22; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
23; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
24; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000026 store <2 x half> <half -0.0, half -0.0>, <2 x half> addrspace(1)* %out
27 ret void
28}
29
30; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
31; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800{{$}}
32; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000033define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000034 store <2 x half> <half 0.5, half 0.5>, <2 x half> addrspace(1)* %out
35 ret void
36}
37
38; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
39; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800{{$}}
40; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000041define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000042 store <2 x half> <half -0.5, half -0.5>, <2 x half> addrspace(1)* %out
43 ret void
44}
45
46; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
47; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
48; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000050 store <2 x half> <half 1.0, half 1.0>, <2 x half> addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
55; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
56; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000058 store <2 x half> <half -1.0, half -1.0>, <2 x half> addrspace(1)* %out
59 ret void
60}
61
62; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
63; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000{{$}}
64; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000066 store <2 x half> <half 2.0, half 2.0>, <2 x half> addrspace(1)* %out
67 ret void
68}
69
70; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
71; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000{{$}}
72; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000073define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000074 store <2 x half> <half -2.0, half -2.0>, <2 x half> addrspace(1)* %out
75 ret void
76}
77
78; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
79; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400{{$}}
80; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000082 store <2 x half> <half 4.0, half 4.0>, <2 x half> addrspace(1)* %out
83 ret void
84}
85
86; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
87; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400{{$}}
88; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000089define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000090 store <2 x half> <half -4.0, half -4.0>, <2 x half> addrspace(1)* %out
91 ret void
92}
93
94; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
95; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118{{$}}
96; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000097define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000098 store <2 x half> <half 0xH3118, half 0xH3118>, <2 x half> addrspace(1)* %out
99 ret void
100}
101
102; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
103; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118{{$}}
104; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000106 store <2 x half> <half 0xHB118, half 0xHB118>, <2 x half> addrspace(1)* %out
107 ret void
108}
109
110; GCN-LABEL: {{^}}store_literal_imm_v2f16:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00
112; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000114 store <2 x half> <half 4096.0, half 4096.0>, <2 x half> addrspace(1)* %out
115 ret void
116}
117
118; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16:
119; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin160f8572018-04-19 21:16:50 +0000120; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000121; GFX9: buffer_store_dword [[REG]]
122
Matt Arsenault1349a042018-05-22 06:32:10 +0000123; FIXME: Shouldn't need right shift and SDWA, also extra copy
124; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000125; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0
Matt Arsenault1349a042018-05-22 06:32:10 +0000126; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
127; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
128
129; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
130; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000131; VI: v_or_b32
132; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000133define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000134 %y = fadd <2 x half> %x, <half 0.0, half 0.0>
135 store <2 x half> %y, <2 x half> addrspace(1)* %out
136 ret void
137}
138
139; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
140; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000141; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000142; GFX9: buffer_store_dword [[REG]]
143
Matt Arsenault1349a042018-05-22 06:32:10 +0000144; FIXME: Shouldn't need right shift and SDWA, also extra copy
145; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000146; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
Matt Arsenault1349a042018-05-22 06:32:10 +0000147; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
148; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
149
150; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
151; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0.5
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000152; VI: v_or_b32
153; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000154define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000155 %y = fadd <2 x half> %x, <half 0.5, half 0.5>
156 store <2 x half> %y, <2 x half> addrspace(1)* %out
157 ret void
158}
159
160; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
161; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000162; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000163; GFX9: buffer_store_dword [[REG]]
164
Matt Arsenault1349a042018-05-22 06:32:10 +0000165; FIXME: Shouldn't need right shift and SDWA, also extra copy
166; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000167; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800
Matt Arsenault1349a042018-05-22 06:32:10 +0000168; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
169; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
170
171; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
172; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -0.5
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000173; VI: v_or_b32
174; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000176 %y = fadd <2 x half> %x, <half -0.5, half -0.5>
177 store <2 x half> %y, <2 x half> addrspace(1)* %out
178 ret void
179}
180
181; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
182; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000183; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000184; GFX9: buffer_store_dword [[REG]]
185
Matt Arsenault1349a042018-05-22 06:32:10 +0000186; FIXME: Shouldn't need right shift and SDWA, also extra copy
187; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000188; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00
Matt Arsenault1349a042018-05-22 06:32:10 +0000189; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
190; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
191
192; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
193; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000194; VI: v_or_b32
195; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000196define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000197 %y = fadd <2 x half> %x, <half 1.0, half 1.0>
198 store <2 x half> %y, <2 x half> addrspace(1)* %out
199 ret void
200}
201
202; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
203; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000204; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000205; GFX9: buffer_store_dword [[REG]]
206
Matt Arsenault1349a042018-05-22 06:32:10 +0000207
208; FIXME: Shouldn't need right shift and SDWA, also extra copy
209; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
210; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0xbc00
211; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
212; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
213
214; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
215; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -1.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000216; VI: v_or_b32
217; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000218define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000219 %y = fadd <2 x half> %x, <half -1.0, half -1.0>
220 store <2 x half> %y, <2 x half> addrspace(1)* %out
221 ret void
222}
223
224; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
225; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000226; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000227; GFX9: buffer_store_dword [[REG]]
228
Matt Arsenault1349a042018-05-22 06:32:10 +0000229; FIXME: Shouldn't need right shift and SDWA, also extra copy
230; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000231; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
Matt Arsenault1349a042018-05-22 06:32:10 +0000232; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
233; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
234
235; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
236; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000237; VI: v_or_b32
238; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000239define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000240 %y = fadd <2 x half> %x, <half 2.0, half 2.0>
241 store <2 x half> %y, <2 x half> addrspace(1)* %out
242 ret void
243}
244
245; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
246; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000247; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000248; GFX9: buffer_store_dword [[REG]]
249
Matt Arsenault1349a042018-05-22 06:32:10 +0000250; FIXME: Shouldn't need right shift and SDWA, also extra copy
251; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000252; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000
Matt Arsenault1349a042018-05-22 06:32:10 +0000253; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
254; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
255
256; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
257; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -2.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000258; VI: v_or_b32
259; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000260define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000261 %y = fadd <2 x half> %x, <half -2.0, half -2.0>
262 store <2 x half> %y, <2 x half> addrspace(1)* %out
263 ret void
264}
265
266; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
267; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000268; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000269; GFX9: buffer_store_dword [[REG]]
270
Matt Arsenault1349a042018-05-22 06:32:10 +0000271; FIXME: Shouldn't need right shift and SDWA, also extra copy
272; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000273; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
Matt Arsenault1349a042018-05-22 06:32:10 +0000274; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
275; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
276
277; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
278; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 4.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000279; VI: v_or_b32
280; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000281define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000282 %y = fadd <2 x half> %x, <half 4.0, half 4.0>
283 store <2 x half> %y, <2 x half> addrspace(1)* %out
284 ret void
285}
286
287; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
288; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000289; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000290; GFX9: buffer_store_dword [[REG]]
291
Matt Arsenault1349a042018-05-22 06:32:10 +0000292; FIXME: Shouldn't need right shift and SDWA, also extra copy
293; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000294; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400
Matt Arsenault1349a042018-05-22 06:32:10 +0000295; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
296; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
297
298; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
299; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -4.0
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000300; VI: v_or_b32
301; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000302define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000303 %y = fadd <2 x half> %x, <half -4.0, half -4.0>
304 store <2 x half> %y, <2 x half> addrspace(1)* %out
305 ret void
306}
307
308; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_v2f16:
309; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
310; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
311; GFX9: buffer_store_dword [[REG]]
312
313; VI: buffer_load_dword
314; VI-NOT: and
Matt Arsenault03c67d12017-11-17 04:18:24 +0000315; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000316; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000317; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
318; VI: v_or_b32
319; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000320define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000321 %x = load <2 x half>, <2 x half> addrspace(1)* %in
322 %y = fadd <2 x half> %x, <half 0.5, half 0.5>
323 store <2 x half> %y, <2 x half> addrspace(1)* %out
324 ret void
325}
326
327; GCN-LABEL: {{^}}commute_add_literal_v2f16:
Matt Arsenault786eeea2017-05-17 20:00:00 +0000328; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
329; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000330; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000331; GFX9: buffer_store_dword [[REG]]
332
333; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
334; VI-DAG: buffer_load_dword
335; VI-NOT: and
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000336; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000337; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Sam Kolton9fa16962017-04-06 15:03:28 +0000338; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000339; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000340define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000341 %x = load <2 x half>, <2 x half> addrspace(1)* %in
342 %y = fadd <2 x half> %x, <half 1024.0, half 1024.0>
343 store <2 x half> %y, <2 x half> addrspace(1)* %out
344 ret void
345}
346
347; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
348; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000349; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000350; GFX9: buffer_store_dword [[REG]]
351
Matt Arsenault1349a042018-05-22 06:32:10 +0000352; FIXME: Shouldn't need right shift and SDWA, also extra copy
353; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
354; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1{{$}}
355; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
356; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
357
358; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
359; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000360; VI: v_or_b32
361; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000362define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000363 %y = fadd <2 x half> %x, <half 0xH0001, half 0xH0001>
364 store <2 x half> %y, <2 x half> addrspace(1)* %out
365 ret void
366}
367
368; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
369; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000370; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000371; GFX9: buffer_store_dword [[REG]]
372
Matt Arsenault1349a042018-05-22 06:32:10 +0000373
374; FIXME: Shouldn't need right shift and SDWA, also extra copy
375; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
376; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2{{$}}
377; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
378; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
379
380; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
381; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000382; VI: v_or_b32
383; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000384define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000385 %y = fadd <2 x half> %x, <half 0xH0002, half 0xH0002>
386 store <2 x half> %y, <2 x half> addrspace(1)* %out
387 ret void
388}
389
390; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
391; GFX9: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin8b20b7d2018-04-17 23:09:05 +0000392; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000393; GFX9: buffer_store_dword [[REG]]
394
Matt Arsenault1349a042018-05-22 06:32:10 +0000395
396; FIXME: Shouldn't need right shift and SDWA, also extra copy
397; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
398; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16{{$}}
399; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
400; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
401
402; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
403; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000404; VI: v_or_b32
405; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000406define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000407 %y = fadd <2 x half> %x, <half 0xH0010, half 0xH0010>
408 store <2 x half> %y, <2 x half> addrspace(1)* %out
409 ret void
410}
411
412; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16:
Sanjay Pateldad3d132018-03-19 19:23:53 +0000413; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, -1
414; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000415; GFX9: buffer_store_dword [[REG]]
416
Matt Arsenault1349a042018-05-22 06:32:10 +0000417; VI: s_load_dword [[VAL:s[0-9]+]]
418; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1{{$}}
419; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
Sanjay Pateldad3d132018-03-19 19:23:53 +0000420; VI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000421define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Sanjay Pateldad3d132018-03-19 19:23:53 +0000422 %xbc = bitcast <2 x half> %x to i32
423 %y = add i32 %xbc, -1
424 %ybc = bitcast i32 %y to <2 x half>
425 store <2 x half> %ybc, <2 x half> addrspace(1)* %out
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000426 ret void
427}
428
429; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16:
Sanjay Pateldad3d132018-03-19 19:23:53 +0000430; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfffefffe
431; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000432; GFX9: buffer_store_dword [[REG]]
433
Matt Arsenault1349a042018-05-22 06:32:10 +0000434; VI: s_load_dword [[VAL:s[0-9]+]]
435; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe{{$}}
436; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
Sanjay Pateldad3d132018-03-19 19:23:53 +0000437; VI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000438define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Sanjay Pateldad3d132018-03-19 19:23:53 +0000439 %xbc = bitcast <2 x half> %x to i32
440 %y = add i32 %xbc, 4294901758 ; 0xfffefffe
441 %ybc = bitcast i32 %y to <2 x half>
442 store <2 x half> %ybc, <2 x half> addrspace(1)* %out
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000443 ret void
444}
445
446; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16:
Sanjay Pateldad3d132018-03-19 19:23:53 +0000447; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfff0fff0
448; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000449; GFX9: buffer_store_dword [[REG]]
450
Matt Arsenault1349a042018-05-22 06:32:10 +0000451
452; VI: s_load_dword [[VAL:s[0-9]+]]
453; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0{{$}}
454; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
Sanjay Pateldad3d132018-03-19 19:23:53 +0000455; VI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000456define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Sanjay Pateldad3d132018-03-19 19:23:53 +0000457 %xbc = bitcast <2 x half> %x to i32
458 %y = add i32 %xbc, 4293984240 ; 0xfff0fff0
459 %ybc = bitcast i32 %y to <2 x half>
460 store <2 x half> %ybc, <2 x half> addrspace(1)* %out
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000461 ret void
462}
463
464; GCN-LABEL: {{^}}add_inline_imm_63_v2f16:
465; GFX9: s_load_dword [[VAL:s[0-9]+]]
466; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63
467; GFX9: buffer_store_dword [[REG]]
468
Matt Arsenault1349a042018-05-22 06:32:10 +0000469; FIXME: Shouldn't need right shift and SDWA, also extra copy
470; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000471; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63
Matt Arsenault1349a042018-05-22 06:32:10 +0000472; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
473; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
474
475; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
476; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 63
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000477; VI: v_or_b32
478; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000479define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000480 %y = fadd <2 x half> %x, <half 0xH003F, half 0xH003F>
481 store <2 x half> %y, <2 x half> addrspace(1)* %out
482 ret void
483}
484
485; GCN-LABEL: {{^}}add_inline_imm_64_v2f16:
486; GFX9: s_load_dword [[VAL:s[0-9]+]]
487; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64
488; GFX9: buffer_store_dword [[REG]]
489
Matt Arsenault1349a042018-05-22 06:32:10 +0000490; FIXME: Shouldn't need right shift and SDWA, also extra copy
491; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000492; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64
Matt Arsenault1349a042018-05-22 06:32:10 +0000493; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
494; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
495
496; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
497; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 64
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000498; VI: v_or_b32
499; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000500define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000501 %y = fadd <2 x half> %x, <half 0xH0040, half 0xH0040>
502 store <2 x half> %y, <2 x half> addrspace(1)* %out
503 ret void
504}
505
506attributes #0 = { nounwind }