blob: 29b80da593535c4723139f072e9a9d9982583477 [file] [log] [blame]
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00001; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenault4bd72362016-12-10 00:39:12 +00002; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3
4; FIXME: Merge into imm.ll
5
6; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i16:
7; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
8; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
9; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000010define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000011 store volatile i16 -32768, i16 addrspace(1)* %out
12 ret void
13}
14
15; GCN-LABEL: {{^}}store_inline_imm_0.0_f16:
16; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
17; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000018define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000019 store half 0.0, half addrspace(1)* %out
20 ret void
21}
22
23; GCN-LABEL: {{^}}store_imm_neg_0.0_f16:
24; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
25; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
26; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000027define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000028 store half -0.0, half addrspace(1)* %out
29 ret void
30}
31
32; GCN-LABEL: {{^}}store_inline_imm_0.5_f16:
33; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3800{{$}}
34; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000035define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000036 store half 0.5, half addrspace(1)* %out
37 ret void
38}
39
40; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f16:
41; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800{{$}}
42; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb800{{$}}
43; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000044define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000045 store half -0.5, half addrspace(1)* %out
46 ret void
47}
48
49; GCN-LABEL: {{^}}store_inline_imm_1.0_f16:
50; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
51; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000052define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000053 store half 1.0, half addrspace(1)* %out
54 ret void
55}
56
57; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f16:
58; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00{{$}}
59; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
60; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000061define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000062 store half -1.0, half addrspace(1)* %out
63 ret void
64}
65
66; GCN-LABEL: {{^}}store_inline_imm_2.0_f16:
67; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}}
68; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000069define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000070 store half 2.0, half addrspace(1)* %out
71 ret void
72}
73
74; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f16:
75; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000{{$}}
76; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc000{{$}}
77; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000078define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000079 store half -2.0, half addrspace(1)* %out
80 ret void
81}
82
83; GCN-LABEL: {{^}}store_inline_imm_4.0_f16:
84; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4400{{$}}
85; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000087 store half 4.0, half addrspace(1)* %out
88 ret void
89}
90
91; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f16:
92; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400{{$}}
93; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc400{{$}}
94; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000095define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +000096 store half -4.0, half addrspace(1)* %out
97 ret void
98}
99
100
101; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f16:
102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3118{{$}}
103; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000104define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000105 store half 0xH3118, half addrspace(1)* %out
106 ret void
107}
108
109; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f16:
110; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118{{$}}
111; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb118{{$}}
112; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000114 store half 0xHB118, half addrspace(1)* %out
115 ret void
116}
117
118; GCN-LABEL: {{^}}store_literal_imm_f16:
119; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c00
120; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000121define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000122 store half 4096.0, half addrspace(1)* %out
123 ret void
124}
125
126; GCN-LABEL: {{^}}add_inline_imm_0.0_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000127; VI: s_load_dword [[VAL:s[0-9]+]]
128; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000129; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000130define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000131 %y = fadd half %x, 0.0
132 store half %y, half addrspace(1)* %out
133 ret void
134}
135
136; GCN-LABEL: {{^}}add_inline_imm_0.5_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000137; VI: s_load_dword [[VAL:s[0-9]+]]
138; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000139; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000141 %y = fadd half %x, 0.5
142 store half %y, half addrspace(1)* %out
143 ret void
144}
145
146; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000147; VI: s_load_dword [[VAL:s[0-9]+]]
148; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000149; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000150define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000151 %y = fadd half %x, -0.5
152 store half %y, half addrspace(1)* %out
153 ret void
154}
155
156; GCN-LABEL: {{^}}add_inline_imm_1.0_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000157; VI: s_load_dword [[VAL:s[0-9]+]]
158; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000159; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000160define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000161 %y = fadd half %x, 1.0
162 store half %y, half addrspace(1)* %out
163 ret void
164}
165
166; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000167; VI: s_load_dword [[VAL:s[0-9]+]]
168; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000169; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000170define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000171 %y = fadd half %x, -1.0
172 store half %y, half addrspace(1)* %out
173 ret void
174}
175
176; GCN-LABEL: {{^}}add_inline_imm_2.0_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000177; VI: s_load_dword [[VAL:s[0-9]+]]
178; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000179; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000180define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000181 %y = fadd half %x, 2.0
182 store half %y, half addrspace(1)* %out
183 ret void
184}
185
186; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000187; VI: s_load_dword [[VAL:s[0-9]+]]
188; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000189; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000190define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000191 %y = fadd half %x, -2.0
192 store half %y, half addrspace(1)* %out
193 ret void
194}
195
196; GCN-LABEL: {{^}}add_inline_imm_4.0_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000197; VI: s_load_dword [[VAL:s[0-9]+]]
198; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000199; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000200define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000201 %y = fadd half %x, 4.0
202 store half %y, half addrspace(1)* %out
203 ret void
204}
205
206; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000207; VI: s_load_dword [[VAL:s[0-9]+]]
208; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000209; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000210define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000211 %y = fadd half %x, -4.0
212 store half %y, half addrspace(1)* %out
213 ret void
214}
215
216; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f16:
217; VI: buffer_load_ushort [[VAL:v[0-9]+]]
218; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
219; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000220define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000221 %x = load half, half addrspace(1)* %in
222 %y = fadd half %x, 0.5
223 store half %y, half addrspace(1)* %out
224 ret void
225}
226
227; GCN-LABEL: {{^}}commute_add_literal_f16:
228; VI: buffer_load_ushort [[VAL:v[0-9]+]]
229; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0x6400, [[VAL]]
230; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000231define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000232 %x = load half, half addrspace(1)* %in
233 %y = fadd half %x, 1024.0
234 store half %y, half addrspace(1)* %out
235 ret void
236}
237
238; GCN-LABEL: {{^}}add_inline_imm_1_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000239; VI: s_load_dword [[VAL:s[0-9]+]]
240; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000241; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000242define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000243 %y = fadd half %x, 0xH0001
244 store half %y, half addrspace(1)* %out
245 ret void
246}
247
248; GCN-LABEL: {{^}}add_inline_imm_2_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000249; VI: s_load_dword [[VAL:s[0-9]+]]
250; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000251; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000252define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000253 %y = fadd half %x, 0xH0002
254 store half %y, half addrspace(1)* %out
255 ret void
256}
257
258; GCN-LABEL: {{^}}add_inline_imm_16_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000259; VI: s_load_dword [[VAL:s[0-9]+]]
260; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 16{{$}}
Matt Arsenault4bd72362016-12-10 00:39:12 +0000261; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000262define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000263 %y = fadd half %x, 0xH0010
264 store half %y, half addrspace(1)* %out
265 ret void
266}
267
268; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16:
Sanjay Patel4b537aa2018-12-22 17:10:31 +0000269; VI: v_add_u16_e32 [[REG:v[0-9]+]], -1, [[REG:v[0-9]+]]
Matt Arsenault4bd72362016-12-10 00:39:12 +0000270; VI: buffer_store_short [[REG]]
Matt Arsenault90083d32018-06-07 09:54:49 +0000271define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
272 %x = load i16, i16 addrspace(1)* %in
273 %y = add i16 %x, -1
Sanjay Pateldad3d132018-03-19 19:23:53 +0000274 %ybc = bitcast i16 %y to half
275 store half %ybc, half addrspace(1)* %out
Matt Arsenault4bd72362016-12-10 00:39:12 +0000276 ret void
277}
278
279; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16:
Sanjay Patel4b537aa2018-12-22 17:10:31 +0000280; VI: v_add_u16_e32 [[REG:v[0-9]+]], -2, [[REG:v[0-9]+]]
Matt Arsenault4bd72362016-12-10 00:39:12 +0000281; VI: buffer_store_short [[REG]]
Matt Arsenault90083d32018-06-07 09:54:49 +0000282define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
283 %x = load i16, i16 addrspace(1)* %in
284 %y = add i16 %x, -2
Sanjay Pateldad3d132018-03-19 19:23:53 +0000285 %ybc = bitcast i16 %y to half
286 store half %ybc, half addrspace(1)* %out
Matt Arsenault4bd72362016-12-10 00:39:12 +0000287 ret void
288}
289
290; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16:
Sanjay Patel4b537aa2018-12-22 17:10:31 +0000291; VI: v_add_u16_e32 [[REG:v[0-9]+]], -16, [[REG:v[0-9]+]]
Matt Arsenault4bd72362016-12-10 00:39:12 +0000292; VI: buffer_store_short [[REG]]
Matt Arsenault90083d32018-06-07 09:54:49 +0000293define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
294 %x = load i16, i16 addrspace(1)* %in
295 %y = add i16 %x, -16
Sanjay Pateldad3d132018-03-19 19:23:53 +0000296 %ybc = bitcast i16 %y to half
297 store half %ybc, half addrspace(1)* %out
Matt Arsenault4bd72362016-12-10 00:39:12 +0000298 ret void
299}
300
301; GCN-LABEL: {{^}}add_inline_imm_63_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000302; VI: s_load_dword [[VAL:s[0-9]+]]
303; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 63
Matt Arsenault4bd72362016-12-10 00:39:12 +0000304; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000305define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000306 %y = fadd half %x, 0xH003F
307 store half %y, half addrspace(1)* %out
308 ret void
309}
310
311; GCN-LABEL: {{^}}add_inline_imm_64_f16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000312; VI: s_load_dword [[VAL:s[0-9]+]]
313; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 64
Matt Arsenault4bd72362016-12-10 00:39:12 +0000314; VI: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000315define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
Matt Arsenault4bd72362016-12-10 00:39:12 +0000316 %y = fadd half %x, 0xH0040
317 store half %y, half addrspace(1)* %out
318 ret void
319}