blob: 7d1f75aad5125671e18e816b085b1a74730e1eab [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +00003
Matt Arsenault0c687392017-01-30 16:57:41 +00004; GCN-LABEL: {{^}}select_f16:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +00005; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
8; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
9; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
10; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
Stanislav Mekhanoshind4ae4702017-09-19 20:54:38 +000011; SI-DAG: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
12; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
13; SI-DAG: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000014; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
15; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
16; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
17; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
18; GCN: buffer_store_short v[[R_F16]]
19; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000020define amdgpu_kernel void @select_f16(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000021 half addrspace(1)* %r,
22 half addrspace(1)* %a,
23 half addrspace(1)* %b,
24 half addrspace(1)* %c,
25 half addrspace(1)* %d) {
26entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +000027 %a.val = load volatile half, half addrspace(1)* %a
28 %b.val = load volatile half, half addrspace(1)* %b
29 %c.val = load volatile half, half addrspace(1)* %c
30 %d.val = load volatile half, half addrspace(1)* %d
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000031 %fcmp = fcmp olt half %a.val, %b.val
32 %r.val = select i1 %fcmp, half %c.val, half %d.val
33 store half %r.val, half addrspace(1)* %r
34 ret void
35}
36
Matt Arsenault0c687392017-01-30 16:57:41 +000037; GCN-LABEL: {{^}}select_f16_imm_a:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000038; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
39; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
40; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000041; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
Stanislav Mekhanoshind4ae4702017-09-19 20:54:38 +000042; SI-DAG: v_cmp_lt_f32_e32 vcc, 0.5, v[[B_F32]]
43; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
44; SI-DAG: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000045; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
46; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault4bd72362016-12-10 00:39:12 +000047; VI: v_cmp_lt_f16_e32 vcc, 0.5, v[[B_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000048; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
49; GCN: buffer_store_short v[[R_F16]]
50; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000051define amdgpu_kernel void @select_f16_imm_a(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000052 half addrspace(1)* %r,
53 half addrspace(1)* %b,
54 half addrspace(1)* %c,
55 half addrspace(1)* %d) {
56entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +000057 %b.val = load volatile half, half addrspace(1)* %b
58 %c.val = load volatile half, half addrspace(1)* %c
59 %d.val = load volatile half, half addrspace(1)* %d
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000060 %fcmp = fcmp olt half 0xH3800, %b.val
61 %r.val = select i1 %fcmp, half %c.val, half %d.val
62 store half %r.val, half addrspace(1)* %r
63 ret void
64}
65
Matt Arsenault0c687392017-01-30 16:57:41 +000066; GCN-LABEL: {{^}}select_f16_imm_b:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000067; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
68; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
69; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000070; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
Stanislav Mekhanoshind4ae4702017-09-19 20:54:38 +000071; SI-DAG: v_cmp_gt_f32_e32 vcc, 0.5, v[[A_F32]]
72; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
73; SI-DAG: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000074; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
75; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +000076
Matt Arsenault4bd72362016-12-10 00:39:12 +000077; VI: v_cmp_gt_f16_e32 vcc, 0.5, v[[A_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000078; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
79; GCN: buffer_store_short v[[R_F16]]
80; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081define amdgpu_kernel void @select_f16_imm_b(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000082 half addrspace(1)* %r,
83 half addrspace(1)* %a,
84 half addrspace(1)* %c,
85 half addrspace(1)* %d) {
86entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +000087 %a.val = load volatile half, half addrspace(1)* %a
88 %c.val = load volatile half, half addrspace(1)* %c
89 %d.val = load volatile half, half addrspace(1)* %d
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000090 %fcmp = fcmp olt half %a.val, 0xH3800
91 %r.val = select i1 %fcmp, half %c.val, half %d.val
92 store half %r.val, half addrspace(1)* %r
93 ret void
94}
95
Matt Arsenault0c687392017-01-30 16:57:41 +000096; GCN-LABEL: {{^}}select_f16_imm_c:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000097; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
98; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
99; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000100; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
101; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
102; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
Matt Arsenault0b26e472016-12-22 21:40:08 +0000103; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000104; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], 0.5, v[[D_F32]], vcc
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000105; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000106
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000107; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x3800{{$}}
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000108; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
Matt Arsenault0b26e472016-12-22 21:40:08 +0000109; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[C_F16]], v[[D_F16]], vcc
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000110; GCN: buffer_store_short v[[R_F16]]
111; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112define amdgpu_kernel void @select_f16_imm_c(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000113 half addrspace(1)* %r,
114 half addrspace(1)* %a,
115 half addrspace(1)* %b,
116 half addrspace(1)* %d) {
117entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000118 %a.val = load volatile half, half addrspace(1)* %a
119 %b.val = load volatile half, half addrspace(1)* %b
120 %d.val = load volatile half, half addrspace(1)* %d
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000121 %fcmp = fcmp olt half %a.val, %b.val
122 %r.val = select i1 %fcmp, half 0xH3800, half %d.val
123 store half %r.val, half addrspace(1)* %r
124 ret void
125}
126
Matt Arsenault0c687392017-01-30 16:57:41 +0000127; GCN-LABEL: {{^}}select_f16_imm_d:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000128; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
129; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
130; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000131; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
132; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
133; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
134; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000135; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], 0.5, v[[C_F32]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000136; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000137; VI: v_mov_b32_e32 v[[D_F16:[0-9]+]], 0x3800{{$}}
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000138; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000139; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
140; GCN: buffer_store_short v[[R_F16]]
141; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000142define amdgpu_kernel void @select_f16_imm_d(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000143 half addrspace(1)* %r,
144 half addrspace(1)* %a,
145 half addrspace(1)* %b,
146 half addrspace(1)* %c) {
147entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000148 %a.val = load volatile half, half addrspace(1)* %a
149 %b.val = load volatile half, half addrspace(1)* %b
150 %c.val = load volatile half, half addrspace(1)* %c
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000151 %fcmp = fcmp olt half %a.val, %b.val
152 %r.val = select i1 %fcmp, half %c.val, half 0xH3800
153 store half %r.val, half addrspace(1)* %r
154 ret void
155}
156
Matt Arsenault0c687392017-01-30 16:57:41 +0000157; GCN-LABEL: {{^}}select_v2f16:
Sam Kolton9fa16962017-04-06 15:03:28 +0000158; SI: v_cvt_f32_f16_e32
159; SI: v_cvt_f32_f16_e32
160; SI: v_cvt_f32_f16_e32
161; SI: v_cvt_f32_f16_e32
Sam Kolton9fa16962017-04-06 15:03:28 +0000162; SI: v_cmp_lt_f32_e32
163; SI: v_cndmask_b32_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000164; SI: v_cmp_lt_f32_e32
165; SI: v_cndmask_b32_e32
Sam Kolton9fa16962017-04-06 15:03:28 +0000166; SI: v_cvt_f16_f32_e32
167; SI: v_cvt_f16_f32_e32
168
Sam Kolton9fa16962017-04-06 15:03:28 +0000169; VI: v_cmp_lt_f16_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000170; VI: v_cndmask_b32_e32
171; VI: v_cmp_lt_f16_e32
Sam Kolton9fa16962017-04-06 15:03:28 +0000172; VI: v_cndmask_b32_e32
173
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000174; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @select_v2f16(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000176 <2 x half> addrspace(1)* %r,
177 <2 x half> addrspace(1)* %a,
178 <2 x half> addrspace(1)* %b,
179 <2 x half> addrspace(1)* %c,
180 <2 x half> addrspace(1)* %d) {
181entry:
182 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
183 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
184 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
185 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
186 %fcmp = fcmp olt <2 x half> %a.val, %b.val
187 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
188 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
189 ret void
190}
191
Matt Arsenault0c687392017-01-30 16:57:41 +0000192; GCN-LABEL: {{^}}select_v2f16_imm_a:
193; SI: v_cvt_f32_f16_e32
194; SI: v_cvt_f32_f16_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000195; SI: v_cvt_f32_f16_e32
196; SI: v_cvt_f32_f16_e32
197; SI: v_cvt_f32_f16_e32
198; SI: v_cvt_f32_f16_e32
Matt Arsenault0c687392017-01-30 16:57:41 +0000199
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000200; SI: v_cmp_gt_f32_e32
201; SI: v_cndmask_b32_e32
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000202 ; SI: v_cmp_lt_f32_e32 vcc, 0.5
203; SI: v_cndmask_b32_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000204
205; VI: v_cmp_lt_f16_e32
206; VI: v_cndmask_b32_e32
207; VI: v_cmp_gt_f16_e32
208; VI: v_cndmask_b32_e32
209
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000210; SI: v_cvt_f16_f32_e32
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000211; SI: v_cvt_f16_f32_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000212; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000213define amdgpu_kernel void @select_v2f16_imm_a(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000214 <2 x half> addrspace(1)* %r,
215 <2 x half> addrspace(1)* %b,
216 <2 x half> addrspace(1)* %c,
217 <2 x half> addrspace(1)* %d) {
218entry:
219 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
220 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
221 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
222 %fcmp = fcmp olt <2 x half> <half 0xH3800, half 0xH3900>, %b.val
223 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
224 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
225 ret void
226}
227
Matt Arsenault0c687392017-01-30 16:57:41 +0000228; GCN-LABEL: {{^}}select_v2f16_imm_b:
229; SI: v_cvt_f32_f16_e32
230; SI: v_cvt_f32_f16_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000231; SI: v_cvt_f32_f16_e32
232; SI: v_cvt_f32_f16_e32
233; SI: v_cvt_f32_f16_e32
234; SI: v_cvt_f32_f16_e32
Matt Arsenault0c687392017-01-30 16:57:41 +0000235
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000236; SI: v_cmp_lt_f32_e32
237; SI: v_cndmask_b32_e32
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000238; SI: v_cmp_gt_f32_e32 vcc, 0.5
239; SI: v_cndmask_b32_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000240
241; VI: v_cmp_gt_f16_e32
242; VI: v_cndmask_b32_e32
243; VI: v_cmp_lt_f16_e32
244; VI: v_cndmask_b32_e32
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000245
246; SI: v_cvt_f16_f32_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000247; SI: v_cvt_f16_f32_e32
248; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000249define amdgpu_kernel void @select_v2f16_imm_b(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000250 <2 x half> addrspace(1)* %r,
251 <2 x half> addrspace(1)* %a,
252 <2 x half> addrspace(1)* %c,
253 <2 x half> addrspace(1)* %d) {
254entry:
255 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
256 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
257 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
258 %fcmp = fcmp olt <2 x half> %a.val, <half 0xH3800, half 0xH3900>
259 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
260 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
261 ret void
262}
263
Matt Arsenault0c687392017-01-30 16:57:41 +0000264; GCN-LABEL: {{^}}select_v2f16_imm_c:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000265; SI: v_cvt_f32_f16_e32
266; SI: v_cvt_f32_f16_e32
267; SI: v_cvt_f32_f16_e32
268; SI: v_cvt_f32_f16_e32
269; SI: v_cvt_f32_f16_e32
270; SI: v_cvt_f32_f16_e32
Matt Arsenault0b26e472016-12-22 21:40:08 +0000271
Matt Arsenault0c687392017-01-30 16:57:41 +0000272; SI: v_cmp_nlt_f32_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000273; SI: v_cndmask_b32_e32
274; SI: v_cmp_nlt_f32_e32
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000275; SI-DAG: v_cndmask_b32_e32
Matt Arsenault0b26e472016-12-22 21:40:08 +0000276
277; VI: v_cmp_nlt_f16_e32
278; VI: v_cndmask_b32_e32
279
280; VI: v_cmp_nlt_f16_e32
281; VI: v_cndmask_b32_e32
282
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000283; SI-DAG: v_cvt_f16_f32_e32
284; SI: v_cvt_f16_f32_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000285; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000286define amdgpu_kernel void @select_v2f16_imm_c(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000287 <2 x half> addrspace(1)* %r,
288 <2 x half> addrspace(1)* %a,
289 <2 x half> addrspace(1)* %b,
290 <2 x half> addrspace(1)* %d) {
291entry:
292 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
293 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
294 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
295 %fcmp = fcmp olt <2 x half> %a.val, %b.val
296 %r.val = select <2 x i1> %fcmp, <2 x half> <half 0xH3800, half 0xH3900>, <2 x half> %d.val
297 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
298 ret void
299}
300
Matt Arsenault0c687392017-01-30 16:57:41 +0000301; GCN-LABEL: {{^}}select_v2f16_imm_d:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000302; SI: v_cvt_f32_f16_e32
303; SI: v_cvt_f32_f16_e32
304; SI: v_cvt_f32_f16_e32
305; SI: v_cvt_f32_f16_e32
306; SI: v_cvt_f32_f16_e32
307; SI: v_cvt_f32_f16_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000308
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000309; SI: v_cmp_lt_f32_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000310; SI: v_cndmask_b32
311; SI: v_cmp_lt_f32_e32
312; SI: v_cndmask_b32
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000313
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000314; VI: v_cmp_lt_f16_e32
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000315; VI: v_cndmask_b32
316; VI: v_cmp_lt_f16_e32
317; VI: v_cndmask_b32
318
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000319; SI: v_cvt_f16_f32_e32
320; SI: v_cvt_f16_f32_e32
321; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000322define amdgpu_kernel void @select_v2f16_imm_d(
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000323 <2 x half> addrspace(1)* %r,
324 <2 x half> addrspace(1)* %a,
325 <2 x half> addrspace(1)* %b,
326 <2 x half> addrspace(1)* %c) {
327entry:
328 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
329 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
330 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
331 %fcmp = fcmp olt <2 x half> %a.val, %b.val
332 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> <half 0xH3800, half 0xH3900>
333 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
334 ret void
335}