blob: 9dc2f7574852509207ede6010c853375c9f998ee [file] [log] [blame]
Matt Arsenault7121bed2018-08-16 17:07:52 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Alexander Timofeevdb7ee762018-09-11 11:56:50 +00002;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
3;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
Matt Arsenault7121bed2018-08-16 17:07:52 +00005
6define float @v_exp_f32(float %arg0) {
7; SI-LABEL: v_exp_f32:
8; SI: ; %bb.0:
9; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
11; SI-NEXT: v_exp_f32_e32 v0, v0
12; SI-NEXT: s_setpc_b64 s[30:31]
13;
14; VI-LABEL: v_exp_f32:
15; VI: ; %bb.0:
16; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
18; VI-NEXT: v_exp_f32_e32 v0, v0
19; VI-NEXT: s_setpc_b64 s[30:31]
20;
21; GFX9-LABEL: v_exp_f32:
22; GFX9: ; %bb.0:
23; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24; GFX9-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
25; GFX9-NEXT: v_exp_f32_e32 v0, v0
26; GFX9-NEXT: s_setpc_b64 s[30:31]
27 %result = call float @llvm.exp.f32(float %arg0)
28 ret float %result
29}
30
31define <2 x float> @v_exp_v2f32(<2 x float> %arg0) {
Alexander Timofeevdb7ee762018-09-11 11:56:50 +000032; GCN-LABEL: v_exp_v2f32:
33; GCN: ; %bb.0:
34; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
36; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
37; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
38; GCN-NEXT: v_exp_f32_e32 v0, v0
39; GCN-NEXT: v_exp_f32_e32 v1, v1
40; GCN-NEXT: s_setpc_b64 s[30:31]
Matt Arsenault7121bed2018-08-16 17:07:52 +000041 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0)
42 ret <2 x float> %result
43}
44
45define <3 x float> @v_exp_v3f32(<3 x float> %arg0) {
Alexander Timofeevdb7ee762018-09-11 11:56:50 +000046; GCN-LABEL: v_exp_v3f32:
47; GCN: ; %bb.0:
48; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
50; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
51; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
52; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
53; GCN-NEXT: v_exp_f32_e32 v0, v0
54; GCN-NEXT: v_exp_f32_e32 v1, v1
55; GCN-NEXT: v_exp_f32_e32 v2, v2
56; GCN-NEXT: s_setpc_b64 s[30:31]
Matt Arsenault7121bed2018-08-16 17:07:52 +000057;
Matt Arsenault7121bed2018-08-16 17:07:52 +000058 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0)
59 ret <3 x float> %result
60}
61
62define <4 x float> @v_exp_v4f32(<4 x float> %arg0) {
63; SI-LABEL: v_exp_v4f32:
64; SI: ; %bb.0:
65; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Alexander Timofeevdb7ee762018-09-11 11:56:50 +000066; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
67; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0
68; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1
69; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2
70; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3
Matt Arsenault7121bed2018-08-16 17:07:52 +000071; SI-NEXT: v_exp_f32_e32 v0, v0
72; SI-NEXT: v_exp_f32_e32 v1, v1
73; SI-NEXT: v_exp_f32_e32 v2, v2
74; SI-NEXT: v_exp_f32_e32 v3, v3
75; SI-NEXT: s_setpc_b64 s[30:31]
Matt Arsenault7121bed2018-08-16 17:07:52 +000076 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0)
77 ret <4 x float> %result
78}
79
80define half @v_exp_f16(half %arg0) {
81; SI-LABEL: v_exp_f16:
82; SI: ; %bb.0:
83; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
85; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
86; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
87; SI-NEXT: v_exp_f32_e32 v0, v0
88; SI-NEXT: s_setpc_b64 s[30:31]
89;
90; VI-LABEL: v_exp_f16:
91; VI: ; %bb.0:
92; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93; VI-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
94; VI-NEXT: v_exp_f16_e32 v0, v0
95; VI-NEXT: s_setpc_b64 s[30:31]
96;
97; GFX9-LABEL: v_exp_f16:
98; GFX9: ; %bb.0:
99; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX9-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
101; GFX9-NEXT: v_exp_f16_e32 v0, v0
102; GFX9-NEXT: s_setpc_b64 s[30:31]
103 %result = call half @llvm.exp.f16(half %arg0)
104 ret half %result
105}
106
107define <2 x half> @v_exp_v2f16(<2 x half> %arg0) {
108; SI-LABEL: v_exp_v2f16:
109; SI: ; %bb.0:
110; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
112; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000113; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
Matt Arsenault7121bed2018-08-16 17:07:52 +0000114; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
115; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000116; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
117; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
Matt Arsenault7121bed2018-08-16 17:07:52 +0000118; SI-NEXT: v_exp_f32_e32 v0, v0
119; SI-NEXT: v_exp_f32_e32 v1, v1
120; SI-NEXT: s_setpc_b64 s[30:31]
121;
122; VI-LABEL: v_exp_v2f16:
123; VI: ; %bb.0:
124; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000125; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
126; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
127; VI-NEXT: v_mul_f16_sdwa [[MUL1:v[0-9]+]], v{{[0-9]+}}, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
128; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v{{[0-9]+}}
129; VI-NEXT: v_exp_f16_sdwa [[MUL1]], [[MUL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
130; VI-NEXT: v_exp_f16_e32 [[MUL2]], [[MUL2]]
131; VI-NEXT: v_or_b32_e32 v{{[0-9]+}}, [[MUL2]], [[MUL1]]
Matt Arsenault7121bed2018-08-16 17:07:52 +0000132; VI-NEXT: s_setpc_b64 s[30:31]
133;
134; GFX9-LABEL: v_exp_v2f16:
135; GFX9: ; %bb.0:
136; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000137; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
138; GFX9-NEXT: v_pk_mul_f16 v0, v0, [[SREG]] op_sel_hi:[1,0]
Matt Arsenault7121bed2018-08-16 17:07:52 +0000139; GFX9-NEXT: v_exp_f16_e32 v1, v0
140; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
141; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
142; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
143; GFX9-NEXT: s_setpc_b64 s[30:31]
144 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0)
145 ret <2 x half> %result
146}
147
148; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) {
149; %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0)
150; ret <3 x half> %result
151; }
152
153define <4 x half> @v_exp_v4f16(<4 x half> %arg0) {
154; SI-LABEL: v_exp_v4f16:
155; SI: ; %bb.0:
156; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Matt Arsenault7121bed2018-08-16 17:07:52 +0000157; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
Matt Arsenaultbf07a502018-08-31 15:39:52 +0000158; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
159; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
160; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000161; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
Matt Arsenaultbf07a502018-08-31 15:39:52 +0000162; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
Matt Arsenault7121bed2018-08-16 17:07:52 +0000163; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
Matt Arsenault7121bed2018-08-16 17:07:52 +0000164; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
Matt Arsenaultbf07a502018-08-31 15:39:52 +0000165; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000166; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0
167; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1
168; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2
169; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3
Matt Arsenaultbf07a502018-08-31 15:39:52 +0000170; SI-NEXT: v_exp_f32_e32 v0, v0
171; SI-NEXT: v_exp_f32_e32 v1, v1
172; SI-NEXT: v_exp_f32_e32 v2, v2
173; SI-NEXT: v_exp_f32_e32 v3, v3
Matt Arsenault7121bed2018-08-16 17:07:52 +0000174; SI-NEXT: s_setpc_b64 s[30:31]
175;
176; VI-LABEL: v_exp_v4f16:
177; VI: ; %bb.0:
178; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000179; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
180; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
181; VI-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
182; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
183; VI-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
184; VI-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
185; VI-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
186; VI-NEXT: v_exp_f16_sdwa [[EXP2:v[0-9]+]], v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
187; VI-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL2]]
188; VI-NEXT: v_exp_f16_sdwa [[EXP4:v[0-9]+]], v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
189; VI-NEXT: v_or_b32_e32 v1, [[EXP1]], [[EXP2]]
190; VI-NEXT: v_or_b32_e32 v0, [[EXP3]], [[EXP4]]
Matt Arsenault7121bed2018-08-16 17:07:52 +0000191; VI-NEXT: s_setpc_b64 s[30:31]
192;
193; GFX9-LABEL: v_exp_v4f16:
194; GFX9: ; %bb.0:
195; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000196; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
197; GFX9-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
198; GFX9-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
199; GFX9-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
200; GFX9-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
201; GFX9-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
202; GFX9-NEXT: v_exp_f16_e32 [[EXP2:v[0-9]+]], [[MUL2]]
203; GFX9-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL4]]
204; GFX9-NEXT: v_exp_f16_e32 [[EXP4:v[0-9]+]], [[MUL3]]
205; GFX9-NEXT: v_mov_b32_e32 [[VCONST:v[0-9]+]], 0xffff
206; GFX9-NEXT: v_and_b32_e32 [[AND1:v[0-9]+]], [[VCONST]], [[EXP2]]
207; GFX9-NEXT: v_and_b32_e32 [[AND2:v[0-9]+]], [[VCONST]], [[EXP1]]
208; GFX9-NEXT: v_lshl_or_b32 v0, [[EXP3]], 16, [[AND1]]
209; GFX9-NEXT: v_lshl_or_b32 v1, [[EXP4]], 16, [[AND2]]
Matt Arsenault7121bed2018-08-16 17:07:52 +0000210; GFX9-NEXT: s_setpc_b64 s[30:31]
211 %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0)
212 ret <4 x half> %result
213}
214
215declare float @llvm.exp.f32(float)
216declare <2 x float> @llvm.exp.v2f32(<2 x float>)
217declare <3 x float> @llvm.exp.v3f32(<3 x float>)
218declare <4 x float> @llvm.exp.v4f32(<4 x float>)
219
220declare half @llvm.exp.f16(half)
221declare <2 x half> @llvm.exp.v2f16(<2 x half>)
222declare <3 x half> @llvm.exp.v3f16(<3 x half>)
223declare <4 x half> @llvm.exp.v4f16(<4 x half>)
224