blob: a82b310726a17734d717aaa581e8d8f734c68c8d [file] [log] [blame]
Matt Arsenaulteb522e62017-02-27 22:15:25 +00001; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
Matt Arsenaultce841302016-12-22 03:05:37 +00003
Matt Arsenaultd5d78512017-01-31 17:28:40 +00004declare half @llvm.fabs.f16(half) #0
Matt Arsenaultce841302016-12-22 03:05:37 +00005declare half @llvm.canonicalize.f16(half) #0
Matt Arsenault70306612017-01-23 18:52:26 +00006declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
7declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
Matt Arsenaultce841302016-12-22 03:05:37 +00008
9; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
10; GCN: v_mul_f16_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
11; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000013 %val = load half, half addrspace(1)* %out
14 %canonicalized = call half @llvm.canonicalize.f16(half %val)
15 store half %canonicalized, half addrspace(1)* %out
16 ret void
17}
18
19; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
20; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
21; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000022define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000023 %val = bitcast i16 %val.arg to half
24 %canonicalized = call half @llvm.canonicalize.f16(half %val)
25 store half %canonicalized, half addrspace(1)* %out
26 ret void
27}
28
Matt Arsenaultd5d78512017-01-31 17:28:40 +000029; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16:
30; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
31; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000032define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000033 %val = load half, half addrspace(1)* %out
34 %val.fabs = call half @llvm.fabs.f16(half %val)
35 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs)
36 store half %canonicalized, half addrspace(1)* %out
37 ret void
38}
39
40; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16:
41; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, -|{{v[0-9]+}}|
42; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000043define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000044 %val = load half, half addrspace(1)* %out
45 %val.fabs = call half @llvm.fabs.f16(half %val)
46 %val.fabs.fneg = fsub half -0.0, %val.fabs
47 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
48 store half %canonicalized, half addrspace(1)* %out
49 ret void
50}
51
52; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16:
53; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, -{{v[0-9]+}}
54; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000056 %val = load half, half addrspace(1)* %out
57 %val.fneg = fsub half -0.0, %val
58 %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
59 store half %canonicalized, half addrspace(1)* %out
60 ret void
61}
62
Matt Arsenaultce841302016-12-22 03:05:37 +000063; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
64; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
65; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000066define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000067 %canonicalized = call half @llvm.canonicalize.f16(half 0.0)
68 store half %canonicalized, half addrspace(1)* %out
69 ret void
70}
71
72; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
73; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
74; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000075define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000076 %canonicalized = call half @llvm.canonicalize.f16(half -0.0)
77 store half %canonicalized, half addrspace(1)* %out
78 ret void
79}
80
81; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
82; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
83; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000084define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000085 %canonicalized = call half @llvm.canonicalize.f16(half 1.0)
86 store half %canonicalized, half addrspace(1)* %out
87 ret void
88}
89
90; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
91; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
92; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000093define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000094 %canonicalized = call half @llvm.canonicalize.f16(half -1.0)
95 store half %canonicalized, half addrspace(1)* %out
96 ret void
97}
98
99; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
100; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
101; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000102define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000103 %canonicalized = call half @llvm.canonicalize.f16(half 16.0)
104 store half %canonicalized, half addrspace(1)* %out
105 ret void
106}
107
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000108; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
109; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000110; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000111define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000112 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
113 store half %canonicalized, half addrspace(1)* %out
114 ret void
115}
116
117; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
118; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
119; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000120define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000121 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
122 store half %canonicalized, half addrspace(1)* %out
123 ret void
124}
125
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000126; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
127; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000128; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000129define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000130 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
131 store half %canonicalized, half addrspace(1)* %out
132 ret void
133}
134
135; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
136; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
137; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000138define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000139 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
140 store half %canonicalized, half addrspace(1)* %out
141 ret void
142}
143
144; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
145; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
146; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000148 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
149 store half %canonicalized, half addrspace(1)* %out
150 ret void
151}
152
153; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
154; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
155; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000156define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000157 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
158 store half %canonicalized, half addrspace(1)* %out
159 ret void
160}
161
162; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
163; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
164; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000165define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000166 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
167 store half %canonicalized, half addrspace(1)* %out
168 ret void
169}
170
171; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
172; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
173; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000174define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000175 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
176 store half %canonicalized, half addrspace(1)* %out
177 ret void
178}
179
180; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
181; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
182; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000183define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000184 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
185 store half %canonicalized, half addrspace(1)* %out
186 ret void
187}
188
189; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
190; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
191; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000192define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000193 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
194 store half %canonicalized, half addrspace(1)* %out
195 ret void
196}
197
198; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
199; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
200; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000201define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000202 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
203 store half %canonicalized, half addrspace(1)* %out
204 ret void
205}
206
Matt Arsenault70306612017-01-23 18:52:26 +0000207; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16:
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000208; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, {{v[0-9]+}}
209; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, {{v[0-9]+}}
210; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
211
212; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+$}}
213; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000214define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000215 %val = load <2 x half>, <2 x half> addrspace(1)* %out
216 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
217 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
218 ret void
219}
220
221; FIXME: Fold modifier
222; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_v2f16:
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000223; VI: v_bfe_u32
224; VI: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, v{{[0-9]+}}
225; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, v{{[0-9]+}}
226; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}}
227; VI: v_or_b32
228
229; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
230; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000231; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000232define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000233 %val = load <2 x half>, <2 x half> addrspace(1)* %out
234 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
235 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs)
236 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
237 ret void
238}
239
240; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16:
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000241; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
242; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, v{{[0-9]+}}
243; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}}
244; VI: v_or_b32
245
246; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
247; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]] neg_lo:[0,1] neg_hi:[0,1]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000248; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000249define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000250 %val = load <2 x half>, <2 x half> addrspace(1)* %out
251 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
252 %val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
253 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
254 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
255 ret void
256}
257
258; FIXME: Fold modifier
259; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16:
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000260; VI: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}}
261; VI: v_lshrrev_b32_e32 [[FNEG_HI:v[0-9]+]], 16, [[FNEG]]
262; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, [[FNEG]]
263; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, [[FNEG_HI]]
264; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
265
266; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} neg_lo:[0,1] neg_hi:[0,1]{{$}}
267; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000268define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000269 %val = load <2 x half>, <2 x half> addrspace(1)* %out
270 %fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
271 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
272 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
273 ret void
274}
275
276; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16:
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000277; VI: v_mul_f16_e64 [[REG0:v[0-9]+]], 1.0, {{s[0-9]+}}
278; VI: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}}
279; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
280
281; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}}
282; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000283define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000284 %val = bitcast i32 %val.arg to <2 x half>
285 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
286 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
287 ret void
288}
289
290; GCN-LABEL: {{^}}test_fold_canonicalize_p0_v2f16:
291; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
292; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000293define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000294 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
295 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
296 ret void
297}
298
299; GCN-LABEL: {{^}}test_fold_canonicalize_n0_v2f16:
300; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
301; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000302define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000303 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>)
304 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
305 ret void
306}
307
308; GCN-LABEL: {{^}}test_fold_canonicalize_p1_v2f16:
309; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
310; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000311define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000312 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>)
313 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
314 ret void
315}
316
317; GCN-LABEL: {{^}}test_fold_canonicalize_n1_v2f16:
318; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
319; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000320define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000321 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>)
322 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
323 ret void
324}
325
326; GCN-LABEL: {{^}}test_fold_canonicalize_literal_v2f16:
327; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}}
328; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000329define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000330 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>)
331 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
332 ret void
333}
334
335; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000336; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000337; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000338define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000339 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
340 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
341 ret void
342}
343
344; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_v2f16:
345; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
346; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000347define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000348 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
349 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
350 ret void
351}
352
353; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000354; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000355; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000356define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000357 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
358 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
359 ret void
360}
361
362; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_v2f16:
363; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
364; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000365define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000366 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
367 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
368 ret void
369}
370
371; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_v2f16:
372; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}}
373; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000374define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000375 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>)
376 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
377 ret void
378}
379
380; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_v2f16:
381; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
382; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000383define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000384 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
385 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
386 ret void
387}
388
389; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_v2f16:
390; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
391; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000392define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000393 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>)
394 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
395 ret void
396}
397
398; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_v2f16:
399; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
400; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000401define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000402 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>)
403 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
404 ret void
405}
406
407; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_v2f16:
408; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
409; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000410define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000411 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>)
412 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
413 ret void
414}
415
416; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_v2f16:
417; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
418; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000419define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000420 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>)
421 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
422 ret void
423}
424
425; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_v2f16:
426; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
427; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000428define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000429 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>)
430 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
431 ret void
432}
433
Matt Arsenaultce841302016-12-22 03:05:37 +0000434attributes #0 = { nounwind readnone }
435attributes #1 = { nounwind }
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000436attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
437attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }