blob: 498b2ea0afb3a60eced621877f60daf03b70bdb2 [file] [log] [blame]
Matt Arsenaulteb522e62017-02-27 22:15:25 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +00004
5declare half @llvm.copysign.f16(half, half)
6declare float @llvm.copysign.f32(float, float)
7declare double @llvm.copysign.f64(double, double)
8declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>)
9declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
10declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
11
Matt Arsenaulteb522e62017-02-27 22:15:25 +000012; GCN-LABEL: {{^}}test_copysign_f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +000013; SI: buffer_load_ushort v[[MAG:[0-9]+]]
14; SI: buffer_load_ushort v[[SIGN:[0-9]+]]
15; SI: s_brev_b32 s[[CONST:[0-9]+]], -2
16; SI: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
17; SI: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
18; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_F32]]
19; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]]
20; VI: buffer_load_ushort v[[SIGN:[0-9]+]]
21; VI: buffer_load_ushort v[[MAG:[0-9]+]]
22; VI: s_movk_i32 s[[CONST:[0-9]+]], 0x7fff
23; VI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN]]
24; GCN: buffer_store_short v[[OUT]]
25; GCN: s_endpgm
26define void @test_copysign_f16(
27 half addrspace(1)* %arg_out,
28 half addrspace(1)* %arg_mag,
29 half addrspace(1)* %arg_sign) {
30entry:
31 %mag = load half, half addrspace(1)* %arg_mag
32 %sign = load half, half addrspace(1)* %arg_sign
33 %out = call half @llvm.copysign.f16(half %mag, half %sign)
34 store half %out, half addrspace(1)* %arg_out
35 ret void
36}
37
Matt Arsenaulteb522e62017-02-27 22:15:25 +000038; GCN-LABEL: {{^}}test_copysign_out_f32_mag_f16_sign_f32:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +000039; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
40; GCN-DAG: buffer_load_dword v[[SIGN:[0-9]+]]
41; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
42; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
43; GCN: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG_EXT]], v[[SIGN]]
44; GCN: buffer_store_dword v[[OUT]]
45; GCN: s_endpgm
46define void @test_copysign_out_f32_mag_f16_sign_f32(
47 float addrspace(1)* %arg_out,
48 half addrspace(1)* %arg_mag,
49 float addrspace(1)* %arg_sign) {
50entry:
51 %mag = load half, half addrspace(1)* %arg_mag
52 %mag.ext = fpext half %mag to float
53 %sign = load float, float addrspace(1)* %arg_sign
54 %out = call float @llvm.copysign.f32(float %mag.ext, float %sign)
55 store float %out, float addrspace(1)* %arg_out
56 ret void
57}
58
Matt Arsenaulteb522e62017-02-27 22:15:25 +000059; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f16_sign_f64:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +000060; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
61; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
62; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
63; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
64; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[MAG_EXT_LO:[0-9]+]]:[[MAG_EXT_HI:[0-9]+]]{{\]}}, v[[MAG_EXT]]
65; GCN: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_EXT_HI]], v[[SIGN_HI]]
66; GCN: buffer_store_dwordx2 v{{\[}}[[MAG_EXT_LO]]:[[OUT_HI]]{{\]}}
67; GCN: s_endpgm
68define void @test_copysign_out_f64_mag_f16_sign_f64(
69 double addrspace(1)* %arg_out,
70 half addrspace(1)* %arg_mag,
71 double addrspace(1)* %arg_sign) {
72entry:
73 %mag = load half, half addrspace(1)* %arg_mag
74 %mag.ext = fpext half %mag to double
75 %sign = load double, double addrspace(1)* %arg_sign
76 %out = call double @llvm.copysign.f64(double %mag.ext, double %sign)
77 store double %out, double addrspace(1)* %arg_out
78 ret void
79}
80
Matt Arsenaulteb522e62017-02-27 22:15:25 +000081; GCN-LABEL: {{^}}test_copysign_out_f32_mag_f32_sign_f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +000082; GCN-DAG: buffer_load_dword v[[MAG:[0-9]+]]
83; GCN-DAG: buffer_load_ushort v[[SIGN:[0-9]+]]
84; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
85; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
86; SI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN_F32]]
87; VI-DAG: v_lshlrev_b32_e32 v[[SIGN_SHIFT:[0-9]+]], 16, v[[SIGN]]
88; VI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN_SHIFT]]
89; GCN: buffer_store_dword v[[OUT]]
90; GCN: s_endpgm
91define void @test_copysign_out_f32_mag_f32_sign_f16(
92 float addrspace(1)* %arg_out,
93 float addrspace(1)* %arg_mag,
94 half addrspace(1)* %arg_sign) {
95entry:
96 %mag = load float, float addrspace(1)* %arg_mag
97 %sign = load half, half addrspace(1)* %arg_sign
98 %sign.ext = fpext half %sign to float
99 %out = call float @llvm.copysign.f32(float %mag, float %sign.ext)
100 store float %out, float addrspace(1)* %arg_out
101 ret void
102}
103
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000104; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f64_sign_f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000105; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]{{\]}}
106; GCN-DAG: buffer_load_ushort v[[SIGN:[0-9]+]]
107; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
108; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
109; SI: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_F32]]
110; VI-DAG: v_lshlrev_b32_e32 v[[SIGN_SHIFT:[0-9]+]], 16, v[[SIGN]]
111; VI: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_SHIFT]]
112; GCN: buffer_store_dwordx2 v{{\[}}[[MAG_LO]]:[[OUT_HI]]{{\]}}
113; GCN: s_endpgm
114define void @test_copysign_out_f64_mag_f64_sign_f16(
115 double addrspace(1)* %arg_out,
116 double addrspace(1)* %arg_mag,
117 half addrspace(1)* %arg_sign) {
118entry:
119 %mag = load double, double addrspace(1)* %arg_mag
120 %sign = load half, half addrspace(1)* %arg_sign
121 %sign.ext = fpext half %sign to double
122 %out = call double @llvm.copysign.f64(double %mag, double %sign.ext)
123 store double %out, double addrspace(1)* %arg_out
124 ret void
125}
126
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000127; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f32:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000128; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
129; GCN-DAG: buffer_load_dword v[[SIGN:[0-9]+]]
130; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
131; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
132; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN]]
133; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]]
134; VI-DAG: s_movk_i32 s[[CONST:[0-9]+]], 0x7fff
135; VI-DAG: v_lshrrev_b32_e32 v[[SIGN_SHIFT:[0-9]+]], 16, v[[SIGN]]
136; VI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN_SHIFT]]
137; GCN: buffer_store_short v[[OUT]]
138; GCN: s_endpgm
139define void @test_copysign_out_f16_mag_f16_sign_f32(
140 half addrspace(1)* %arg_out,
141 half addrspace(1)* %arg_mag,
142 float addrspace(1)* %arg_sign) {
143entry:
144 %mag = load half, half addrspace(1)* %arg_mag
145 %sign = load float, float addrspace(1)* %arg_sign
146 %sign.trunc = fptrunc float %sign to half
147 %out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
148 store half %out, half addrspace(1)* %arg_out
149 ret void
150}
151
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000152; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f64:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000153; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
154; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
155; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
156; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
157; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_HI]]
158; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]]
159; VI-DAG: s_movk_i32 s[[CONST:[0-9]+]], 0x7fff
160; VI-DAG: v_lshrrev_b32_e32 v[[SIGN_SHIFT:[0-9]+]], 16, v[[SIGN_HI]]
161; VI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN_SHIFT]]
162; GCN: buffer_store_short v[[OUT]]
163; GCN: s_endpgm
164define void @test_copysign_out_f16_mag_f16_sign_f64(
165 half addrspace(1)* %arg_out,
166 half addrspace(1)* %arg_mag,
167 double addrspace(1)* %arg_sign) {
168entry:
169 %mag = load half, half addrspace(1)* %arg_mag
170 %sign = load double, double addrspace(1)* %arg_sign
171 %sign.trunc = fptrunc double %sign to half
172 %out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
173 store half %out, half addrspace(1)* %arg_out
174 ret void
175}
176
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000177; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f32_sign_f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000178; GCN-DAG: buffer_load_dword v[[MAG:[0-9]+]]
179; GCN-DAG: buffer_load_ushort v[[SIGN:[0-9]+]]
180; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
181; SI-DAG: v_cvt_f16_f32_e32 v[[MAG_TRUNC:[0-9]+]], v[[MAG]]
182; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
183; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG_TRUNC]]
184; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_F32]]
185; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]]
186; VI-DAG: s_movk_i32 s[[CONST:[0-9]+]], 0x7fff
187; VI-DAG: v_cvt_f16_f32_e32 v[[MAG_TRUNC:[0-9]+]], v[[MAG]]
188; VI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG_TRUNC]], v[[SIGN]]
189; GCN: buffer_store_short v[[OUT]]
190; GCN: s_endpgm
191define void @test_copysign_out_f16_mag_f32_sign_f16(
192 half addrspace(1)* %arg_out,
193 float addrspace(1)* %arg_mag,
194 half addrspace(1)* %arg_sign) {
195entry:
196 %mag = load float, float addrspace(1)* %arg_mag
197 %mag.trunc = fptrunc float %mag to half
198 %sign = load half, half addrspace(1)* %arg_sign
199 %out = call half @llvm.copysign.f16(half %mag.trunc, half %sign)
200 store half %out, half addrspace(1)* %arg_out
201 ret void
202}
203
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000204; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f64_sign_f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000205; GCN: v_bfi_b32
206; GCN: s_endpgm
207define void @test_copysign_out_f16_mag_f64_sign_f16(
208 half addrspace(1)* %arg_out,
209 double addrspace(1)* %arg_mag,
210 half addrspace(1)* %arg_sign) {
211entry:
212 %mag = load double, double addrspace(1)* %arg_mag
213 %mag.trunc = fptrunc double %mag to half
214 %sign = load half, half addrspace(1)* %arg_sign
215 %out = call half @llvm.copysign.f16(half %mag.trunc, half %sign)
216 store half %out, half addrspace(1)* %arg_out
217 ret void
218}
219
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000220; GCN-LABEL: {{^}}test_copysign_v2f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000221; GCN: v_bfi_b32
222; GCN: v_bfi_b32
223; GCN: s_endpgm
224define void @test_copysign_v2f16(
225 <2 x half> addrspace(1)* %arg_out,
226 <2 x half> %arg_mag,
227 <2 x half> %arg_sign) {
228entry:
229 %out = call <2 x half> @llvm.copysign.v2f16(<2 x half> %arg_mag, <2 x half> %arg_sign)
230 store <2 x half> %out, <2 x half> addrspace(1)* %arg_out
231 ret void
232}
233
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000234; GCN-LABEL: {{^}}test_copysign_v3f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000235; GCN: v_bfi_b32
236; GCN: v_bfi_b32
237; GCN: v_bfi_b32
238; GCN: s_endpgm
239define void @test_copysign_v3f16(
240 <3 x half> addrspace(1)* %arg_out,
241 <3 x half> %arg_mag,
242 <3 x half> %arg_sign) {
243entry:
244 %out = call <3 x half> @llvm.copysign.v3f16(<3 x half> %arg_mag, <3 x half> %arg_sign)
245 store <3 x half> %out, <3 x half> addrspace(1)* %arg_out
246 ret void
247}
248
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000249; GCN-LABEL: {{^}}test_copysign_v4f16:
Konstantin Zhuravlyov7d882752017-01-13 19:49:25 +0000250; GCN: v_bfi_b32
251; GCN: v_bfi_b32
252; GCN: v_bfi_b32
253; GCN: v_bfi_b32
254; GCN: s_endpgm
255define void @test_copysign_v4f16(
256 <4 x half> addrspace(1)* %arg_out,
257 <4 x half> %arg_mag,
258 <4 x half> %arg_sign) {
259entry:
260 %out = call <4 x half> @llvm.copysign.v4f16(<4 x half> %arg_mag, <4 x half> %arg_sign)
261 store <4 x half> %out, <4 x half> addrspace(1)* %arg_out
262 ret void
263}