blob: 431c41482004ef5ee8d81c90d5e75806ab71c879 [file] [log] [blame]
Alexander Timofeev982aee62017-07-04 17:32:00 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
Matt Arsenaulteb522e62017-02-27 22:15:25 +00004; FIXME: Merge into imm.ll
5
6; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
7; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
8; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00009define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000010 store <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> addrspace(1)* %out
11 ret void
12}
13
14; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
15; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
16; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000018 store <2 x half> <half 0.0, half 0.0>, <2 x half> addrspace(1)* %out
19 ret void
20}
21
22; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
23; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
24; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000026 store <2 x half> <half -0.0, half -0.0>, <2 x half> addrspace(1)* %out
27 ret void
28}
29
30; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
31; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800{{$}}
32; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000033define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000034 store <2 x half> <half 0.5, half 0.5>, <2 x half> addrspace(1)* %out
35 ret void
36}
37
38; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
39; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800{{$}}
40; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000041define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000042 store <2 x half> <half -0.5, half -0.5>, <2 x half> addrspace(1)* %out
43 ret void
44}
45
46; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
47; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
48; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000050 store <2 x half> <half 1.0, half 1.0>, <2 x half> addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
55; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
56; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000058 store <2 x half> <half -1.0, half -1.0>, <2 x half> addrspace(1)* %out
59 ret void
60}
61
62; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
63; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000{{$}}
64; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000066 store <2 x half> <half 2.0, half 2.0>, <2 x half> addrspace(1)* %out
67 ret void
68}
69
70; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
71; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000{{$}}
72; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000073define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000074 store <2 x half> <half -2.0, half -2.0>, <2 x half> addrspace(1)* %out
75 ret void
76}
77
78; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
79; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400{{$}}
80; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000082 store <2 x half> <half 4.0, half 4.0>, <2 x half> addrspace(1)* %out
83 ret void
84}
85
86; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
87; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400{{$}}
88; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000089define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000090 store <2 x half> <half -4.0, half -4.0>, <2 x half> addrspace(1)* %out
91 ret void
92}
93
94; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
95; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118{{$}}
96; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000097define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000098 store <2 x half> <half 0xH3118, half 0xH3118>, <2 x half> addrspace(1)* %out
99 ret void
100}
101
102; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
103; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118{{$}}
104; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000106 store <2 x half> <half 0xHB118, half 0xHB118>, <2 x half> addrspace(1)* %out
107 ret void
108}
109
110; GCN-LABEL: {{^}}store_literal_imm_v2f16:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00
112; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000114 store <2 x half> <half 4096.0, half 4096.0>, <2 x half> addrspace(1)* %out
115 ret void
116}
117
118; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16:
119; GFX9: s_load_dword [[VAL:s[0-9]+]]
120; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
121; GFX9: buffer_store_dword [[REG]]
122
123; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
124; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
125; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000126; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000127; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000128; VI: v_or_b32
129; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000130define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000131 %y = fadd <2 x half> %x, <half 0.0, half 0.0>
132 store <2 x half> %y, <2 x half> addrspace(1)* %out
133 ret void
134}
135
136; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
137; GFX9: s_load_dword [[VAL:s[0-9]+]]
138; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
139; GFX9: buffer_store_dword [[REG]]
140
141; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
142; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
143; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000144; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000145; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000146; VI: v_or_b32
147; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000148define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000149 %y = fadd <2 x half> %x, <half 0.5, half 0.5>
150 store <2 x half> %y, <2 x half> addrspace(1)* %out
151 ret void
152}
153
154; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
155; GFX9: s_load_dword [[VAL:s[0-9]+]]
156; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
157; GFX9: buffer_store_dword [[REG]]
158
159; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
160; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
161; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000162; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000163; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000164; VI: v_or_b32
165; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000166define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000167 %y = fadd <2 x half> %x, <half -0.5, half -0.5>
168 store <2 x half> %y, <2 x half> addrspace(1)* %out
169 ret void
170}
171
172; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
173; GFX9: s_load_dword [[VAL:s[0-9]+]]
174; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
175; GFX9: buffer_store_dword [[REG]]
176
177; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
178; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
179; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000180; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000181; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000182; VI: v_or_b32
183; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000184define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000185 %y = fadd <2 x half> %x, <half 1.0, half 1.0>
186 store <2 x half> %y, <2 x half> addrspace(1)* %out
187 ret void
188}
189
190; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
191; GFX9: s_load_dword [[VAL:s[0-9]+]]
192; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
193; GFX9: buffer_store_dword [[REG]]
194
195; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
196; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
197; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000198; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000199; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000200; VI: v_or_b32
201; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000202define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000203 %y = fadd <2 x half> %x, <half -1.0, half -1.0>
204 store <2 x half> %y, <2 x half> addrspace(1)* %out
205 ret void
206}
207
208; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
209; GFX9: s_load_dword [[VAL:s[0-9]+]]
210; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
211; GFX9: buffer_store_dword [[REG]]
212
213; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
214; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
215; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000216; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000217; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000218; VI: v_or_b32
219; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000220define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000221 %y = fadd <2 x half> %x, <half 2.0, half 2.0>
222 store <2 x half> %y, <2 x half> addrspace(1)* %out
223 ret void
224}
225
226; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
227; GFX9: s_load_dword [[VAL:s[0-9]+]]
228; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
229; GFX9: buffer_store_dword [[REG]]
230
231; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
232; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
233; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000234; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000235; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000236; VI: v_or_b32
237; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000238define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000239 %y = fadd <2 x half> %x, <half -2.0, half -2.0>
240 store <2 x half> %y, <2 x half> addrspace(1)* %out
241 ret void
242}
243
244; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
245; GFX9: s_load_dword [[VAL:s[0-9]+]]
246; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
247; GFX9: buffer_store_dword [[REG]]
248
249; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
250; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
251; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000252; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000253; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000254; VI: v_or_b32
255; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000256define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000257 %y = fadd <2 x half> %x, <half 4.0, half 4.0>
258 store <2 x half> %y, <2 x half> addrspace(1)* %out
259 ret void
260}
261
262; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
263; GFX9: s_load_dword [[VAL:s[0-9]+]]
264; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
265; GFX9: buffer_store_dword [[REG]]
266
267; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
268; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
269; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000270; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000271; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000272; VI: v_or_b32
273; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000274define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000275 %y = fadd <2 x half> %x, <half -4.0, half -4.0>
276 store <2 x half> %y, <2 x half> addrspace(1)* %out
277 ret void
278}
279
280; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_v2f16:
281; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
282; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
283; GFX9: buffer_store_dword [[REG]]
284
285; VI: buffer_load_dword
286; VI-NOT: and
Matt Arsenault03c67d12017-11-17 04:18:24 +0000287; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000288; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000289; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
290; VI: v_or_b32
291; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000292define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000293 %x = load <2 x half>, <2 x half> addrspace(1)* %in
294 %y = fadd <2 x half> %x, <half 0.5, half 0.5>
295 store <2 x half> %y, <2 x half> addrspace(1)* %out
296 ret void
297}
298
299; GCN-LABEL: {{^}}commute_add_literal_v2f16:
Matt Arsenault786eeea2017-05-17 20:00:00 +0000300; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
301; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000302; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000303; GFX9: buffer_store_dword [[REG]]
304
305; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
306; VI-DAG: buffer_load_dword
307; VI-NOT: and
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000308; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000309; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Sam Kolton9fa16962017-04-06 15:03:28 +0000310; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000311; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000312define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000313 %x = load <2 x half>, <2 x half> addrspace(1)* %in
314 %y = fadd <2 x half> %x, <half 1024.0, half 1024.0>
315 store <2 x half> %y, <2 x half> addrspace(1)* %out
316 ret void
317}
318
319; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
320; GFX9: s_load_dword [[VAL:s[0-9]+]]
321; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1{{$}}
322; GFX9: buffer_store_dword [[REG]]
323
324; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
325; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
326; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000327; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000328; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000329; VI: v_or_b32
330; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000331define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000332 %y = fadd <2 x half> %x, <half 0xH0001, half 0xH0001>
333 store <2 x half> %y, <2 x half> addrspace(1)* %out
334 ret void
335}
336
337; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
338; GFX9: s_load_dword [[VAL:s[0-9]+]]
339; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2{{$}}
340; GFX9: buffer_store_dword [[REG]]
341
342; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
343; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
344; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000345; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000346; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000347; VI: v_or_b32
348; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000349define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000350 %y = fadd <2 x half> %x, <half 0xH0002, half 0xH0002>
351 store <2 x half> %y, <2 x half> addrspace(1)* %out
352 ret void
353}
354
355; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
356; GFX9: s_load_dword [[VAL:s[0-9]+]]
357; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16{{$}}
358; GFX9: buffer_store_dword [[REG]]
359
360; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
361; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
362; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000363; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000364; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000365; VI: v_or_b32
366; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000367define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000368 %y = fadd <2 x half> %x, <half 0xH0010, half 0xH0010>
369 store <2 x half> %y, <2 x half> addrspace(1)* %out
370 ret void
371}
372
373; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16:
374; GFX9: s_load_dword [[VAL:s[0-9]+]]
375; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1{{$}}
376; GFX9: buffer_store_dword [[REG]]
377
378; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
379; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
380; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000381; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000382; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000383; VI: v_or_b32
384; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000385define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000386 %y = fadd <2 x half> %x, <half 0xHFFFF, half 0xHFFFF>
387 store <2 x half> %y, <2 x half> addrspace(1)* %out
388 ret void
389}
390
391; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16:
392; GFX9: s_load_dword [[VAL:s[0-9]+]]
393; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2{{$}}
394; GFX9: buffer_store_dword [[REG]]
395
396; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
397; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
398; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000399; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000400; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000401; VI: v_or_b32
402; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000403define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000404 %y = fadd <2 x half> %x, <half 0xHFFFE, half 0xHFFFE>
405 store <2 x half> %y, <2 x half> addrspace(1)* %out
406 ret void
407}
408
409; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16:
410; GFX9: s_load_dword [[VAL:s[0-9]+]]
411; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -16{{$}}
412; GFX9: buffer_store_dword [[REG]]
413
414; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
415; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
416; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000417; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000418; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000419; VI: v_or_b32
420; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000421define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000422 %y = fadd <2 x half> %x, <half 0xHFFF0, half 0xHFFF0>
423 store <2 x half> %y, <2 x half> addrspace(1)* %out
424 ret void
425}
426
427; GCN-LABEL: {{^}}add_inline_imm_63_v2f16:
428; GFX9: s_load_dword [[VAL:s[0-9]+]]
429; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63
430; GFX9: buffer_store_dword [[REG]]
431
432; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
433; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
434; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000435; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000436; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000437; VI: v_or_b32
438; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000439define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000440 %y = fadd <2 x half> %x, <half 0xH003F, half 0xH003F>
441 store <2 x half> %y, <2 x half> addrspace(1)* %out
442 ret void
443}
444
445; GCN-LABEL: {{^}}add_inline_imm_64_v2f16:
446; GFX9: s_load_dword [[VAL:s[0-9]+]]
447; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64
448; GFX9: buffer_store_dword [[REG]]
449
450; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
451; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
452; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL0]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000453; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000454; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000455; VI: v_or_b32
456; VI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000457define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000458 %y = fadd <2 x half> %x, <half 0xH0040, half 0xH0040>
459 store <2 x half> %y, <2 x half> addrspace(1)* %out
460 ret void
461}
462
463attributes #0 = { nounwind }