blob: 012d19d3751a30a12422d5fb58754fd89ed50a11 [file] [log] [blame]
Matt Arsenaulteb522e62017-02-27 22:15:25 +00001; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-FLUSH %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
5
6
7; GCN-LABEL: {{^}}s_pack_v2f16:
8; GFX9: s_load_dword [[VAL0:s[0-9]+]]
9; GFX9: s_load_dword [[VAL1:s[0-9]+]]
10; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
11; GFX9: ; use [[PACKED]]
12define void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
13 %val0 = load volatile i32, i32 addrspace(2)* %in0
14 %val1 = load volatile i32, i32 addrspace(2)* %in1
15 %lo.i = trunc i32 %val0 to i16
16 %hi.i = trunc i32 %val1 to i16
17 %lo = bitcast i16 %lo.i to half
18 %hi = bitcast i16 %hi.i to half
19 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
20 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
21 %vec.i32 = bitcast <2 x half> %vec.1 to i32
22
23 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
24 ret void
25}
26
27; GCN-LABEL: {{^}}s_pack_v2f16_imm_lo:
28; GFX9: s_load_dword [[VAL1:s[0-9]+]]
29; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1234, [[VAL1]]
30; GFX9: ; use [[PACKED]]
31define void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
32 %val1 = load i32, i32 addrspace(2)* %in1
33 %hi.i = trunc i32 %val1 to i16
34 %hi = bitcast i16 %hi.i to half
35 %vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
36 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
37 %vec.i32 = bitcast <2 x half> %vec.1 to i32
38
39 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
40 ret void
41}
42
43; GCN-LABEL: {{^}}s_pack_v2f16_imm_hi:
44; GFX9: s_load_dword [[VAL0:s[0-9]+]]
45; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1234
46; GFX9: ; use [[PACKED]]
47define void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 {
48 %val0 = load i32, i32 addrspace(2)* %in0
49 %lo.i = trunc i32 %val0 to i16
50 %lo = bitcast i16 %lo.i to half
51 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
52 %vec.1 = insertelement <2 x half> %vec.0, half 0xH1234, i32 1
53 %vec.i32 = bitcast <2 x half> %vec.1 to i32
54
55 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
56 ret void
57}
58
59; GCN-LABEL: {{^}}v_pack_v2f16:
60; GFX9: flat_load_dword [[VAL0:v[0-9]+]]
61; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
62; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]]
63
64; GFX9-FLUSH: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]]
65; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
66; GFX9: ; use [[PACKED]]
67define void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
68 %tid = call i32 @llvm.amdgcn.workitem.id.x()
69 %tid.ext = sext i32 %tid to i64
70 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
71 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
72 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
73 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
74 %lo.i = trunc i32 %val0 to i16
75 %hi.i = trunc i32 %val1 to i16
76 %lo = bitcast i16 %lo.i to half
77 %hi = bitcast i16 %hi.i to half
78 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
79 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
80 %vec.i32 = bitcast <2 x half> %vec.1 to i32
81 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
82 ret void
83}
84
85; GCN-LABEL: {{^}}v_pack_v2f16_user:
86; GFX9: flat_load_dword [[VAL0:v[0-9]+]]
87; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
88; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]]
89
90; GFX9-FLUSH: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]]
91; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
92
93; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
94define void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
95 %tid = call i32 @llvm.amdgcn.workitem.id.x()
96 %tid.ext = sext i32 %tid to i64
97 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
98 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
99 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
100 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
101 %lo.i = trunc i32 %val0 to i16
102 %hi.i = trunc i32 %val1 to i16
103 %lo = bitcast i16 %lo.i to half
104 %hi = bitcast i16 %hi.i to half
105 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
106 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
107 %vec.i32 = bitcast <2 x half> %vec.1 to i32
108 %foo = add i32 %vec.i32, 9
109 store volatile i32 %foo, i32 addrspace(1)* undef
110 ret void
111}
112
113; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo:
114; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
115; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}}
116; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]]
117
118; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}}
119; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
120; GFX9: ; use [[PACKED]]
121define void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
122 %tid = call i32 @llvm.amdgcn.workitem.id.x()
123 %tid.ext = sext i32 %tid to i64
124 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
125 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
126 %hi.i = trunc i32 %val1 to i16
127 %hi = bitcast i16 %hi.i to half
128 %vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
129 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
130 %vec.i32 = bitcast <2 x half> %vec.1 to i32
131 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
132 ret void
133}
134
135; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo:
136; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
137; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 4.0, [[VAL1]]
138
139; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}}
140; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
141
142; GFX9: ; use [[PACKED]]
143define void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
145 %tid.ext = sext i32 %tid to i64
146 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
147 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
148 %hi.i = trunc i32 %val1 to i16
149 %hi = bitcast i16 %hi.i to half
150 %vec.0 = insertelement <2 x half> undef, half 4.0, i32 0
151 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
152 %vec.i32 = bitcast <2 x half> %vec.1 to i32
153 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
154 ret void
155}
156
157; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi:
158; GFX9: flat_load_dword [[VAL0:v[0-9]+]]
159; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234
160; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]]
161
162; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x1234
163; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]]
164; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
165
166; GFX9: ; use [[PACKED]]
167define void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 {
168 %tid = call i32 @llvm.amdgcn.workitem.id.x()
169 %tid.ext = sext i32 %tid to i64
170 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
171 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
172 %lo.i = trunc i32 %val0 to i16
173 %lo = bitcast i16 %lo.i to half
174 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
175 %vec.1 = insertelement <2 x half> %vec.0, half 0xH1234, i32 1
176 %vec.i32 = bitcast <2 x half> %vec.1 to i32
177 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
178 ret void
179}
180
181; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi:
182; GFX9: flat_load_dword [[VAL:v[0-9]+]]
183; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 1.0
184
185; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x3c00
186; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]]
187; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
188
189; GFX9: ; use [[PACKED]]
190define void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) #0 {
191 %tid = call i32 @llvm.amdgcn.workitem.id.x()
192 %tid.ext = sext i32 %tid to i64
193 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
194 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
195 %lo.i = trunc i32 %val0 to i16
196 %lo = bitcast i16 %lo.i to half
197 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
198 %vec.1 = insertelement <2 x half> %vec.0, half 1.0, i32 1
199 %vec.i32 = bitcast <2 x half> %vec.1 to i32
200 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
201 ret void
202}
203
204; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_hi:
205; GFX9: flat_load_dword [[VAL:v[0-9]+]]
206; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 64
207
208; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]]
209; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]]
210
211; GFX9: ; use [[PACKED]]
212define void @v_pack_v2f16_inline_imm_hi(i32 addrspace(1)* %in0) #0 {
213 %tid = call i32 @llvm.amdgcn.workitem.id.x()
214 %tid.ext = sext i32 %tid to i64
215 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
216 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
217 %lo.i = trunc i32 %val0 to i16
218 %lo = bitcast i16 %lo.i to half
219 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
220 %vec.1 = insertelement <2 x half> %vec.0, half 0xH0040, i32 1
221 %vec.i32 = bitcast <2 x half> %vec.1 to i32
222 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
223 ret void
224}
225
226declare i32 @llvm.amdgcn.workitem.id.x() #1
227
228attributes #0 = { nounwind }
229attributes #1 = { nounwind readnone }