blob: c50d3f7010a408beb4bdc0614667290409ea7dc9 [file] [log] [blame]
Konstantin Zhuravlyovc40d9f22017-12-08 20:52:28 +00001; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
4
5
6; GCN-LABEL: {{^}}s_pack_v2f16:
7; GFX9: s_load_dword [[VAL0:s[0-9]+]]
8; GFX9: s_load_dword [[VAL1:s[0-9]+]]
9; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
10; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000011define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000012 %val0 = load volatile i32, i32 addrspace(2)* %in0
13 %val1 = load volatile i32, i32 addrspace(2)* %in1
14 %lo.i = trunc i32 %val0 to i16
15 %hi.i = trunc i32 %val1 to i16
16 %lo = bitcast i16 %lo.i to half
17 %hi = bitcast i16 %hi.i to half
18 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
19 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
20 %vec.i32 = bitcast <2 x half> %vec.1 to i32
21
22 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
23 ret void
24}
25
26; GCN-LABEL: {{^}}s_pack_v2f16_imm_lo:
27; GFX9: s_load_dword [[VAL1:s[0-9]+]]
28; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1234, [[VAL1]]
29; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000031 %val1 = load i32, i32 addrspace(2)* %in1
32 %hi.i = trunc i32 %val1 to i16
33 %hi = bitcast i16 %hi.i to half
34 %vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
35 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
36 %vec.i32 = bitcast <2 x half> %vec.1 to i32
37
38 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
39 ret void
40}
41
42; GCN-LABEL: {{^}}s_pack_v2f16_imm_hi:
43; GFX9: s_load_dword [[VAL0:s[0-9]+]]
44; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1234
45; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000046define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000047 %val0 = load i32, i32 addrspace(2)* %in0
48 %lo.i = trunc i32 %val0 to i16
49 %lo = bitcast i16 %lo.i to half
50 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
51 %vec.1 = insertelement <2 x half> %vec.0, half 0xH1234, i32 1
52 %vec.i32 = bitcast <2 x half> %vec.1 to i32
53
54 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
55 ret void
56}
57
58; GCN-LABEL: {{^}}v_pack_v2f16:
Matt Arsenault4e309b02017-07-29 01:03:53 +000059; GFX9: global_load_dword [[VAL0:v[0-9]+]]
60; GFX9: global_load_dword [[VAL1:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000061
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +000062; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]]
63; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000064; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000066 %tid = call i32 @llvm.amdgcn.workitem.id.x()
67 %tid.ext = sext i32 %tid to i64
68 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
69 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
70 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
71 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
72 %lo.i = trunc i32 %val0 to i16
73 %hi.i = trunc i32 %val1 to i16
74 %lo = bitcast i16 %lo.i to half
75 %hi = bitcast i16 %hi.i to half
76 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
77 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
78 %vec.i32 = bitcast <2 x half> %vec.1 to i32
79 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
80 ret void
81}
82
83; GCN-LABEL: {{^}}v_pack_v2f16_user:
Matt Arsenault4e309b02017-07-29 01:03:53 +000084; GFX9: global_load_dword [[VAL0:v[0-9]+]]
85; GFX9: global_load_dword [[VAL1:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000086
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +000087; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]]
88; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000089
Matt Arsenault84445dd2017-11-30 22:51:26 +000090; GFX9: v_add_u32_e32 v{{[0-9]+}}, 9, [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000091define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000092 %tid = call i32 @llvm.amdgcn.workitem.id.x()
93 %tid.ext = sext i32 %tid to i64
94 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
95 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
96 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
97 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
98 %lo.i = trunc i32 %val0 to i16
99 %hi.i = trunc i32 %val1 to i16
100 %lo = bitcast i16 %lo.i to half
101 %hi = bitcast i16 %hi.i to half
102 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
103 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
104 %vec.i32 = bitcast <2 x half> %vec.1 to i32
105 %foo = add i32 %vec.i32, 9
106 store volatile i32 %foo, i32 addrspace(1)* undef
107 ret void
108}
109
110; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000111; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000112
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +0000113; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}}
114; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000115; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000116define amdgpu_kernel void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000117 %tid = call i32 @llvm.amdgcn.workitem.id.x()
118 %tid.ext = sext i32 %tid to i64
119 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
120 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
121 %hi.i = trunc i32 %val1 to i16
122 %hi = bitcast i16 %hi.i to half
123 %vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
124 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
125 %vec.i32 = bitcast <2 x half> %vec.1 to i32
126 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
127 ret void
128}
129
130; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000131; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000132
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +0000133; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}}
134; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000135
136; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000137define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000138 %tid = call i32 @llvm.amdgcn.workitem.id.x()
139 %tid.ext = sext i32 %tid to i64
140 %in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
141 %val1 = load volatile i32, i32 addrspace(1)* %in1.gep
142 %hi.i = trunc i32 %val1 to i16
143 %hi = bitcast i16 %hi.i to half
144 %vec.0 = insertelement <2 x half> undef, half 4.0, i32 0
145 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
146 %vec.i32 = bitcast <2 x half> %vec.1 to i32
147 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
148 ret void
149}
150
151; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000152; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000153
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +0000154; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234
155; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]]
156; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000157
158; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000159define amdgpu_kernel void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000160 %tid = call i32 @llvm.amdgcn.workitem.id.x()
161 %tid.ext = sext i32 %tid to i64
162 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
163 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
164 %lo.i = trunc i32 %val0 to i16
165 %lo = bitcast i16 %lo.i to half
166 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
167 %vec.1 = insertelement <2 x half> %vec.0, half 0xH1234, i32 1
168 %vec.i32 = bitcast <2 x half> %vec.1 to i32
169 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
170 ret void
171}
172
173; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000174; GFX9-DAG: global_load_dword [[VAL:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000175
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +0000176; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00
177; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]]
178; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000179
180; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000181define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000182 %tid = call i32 @llvm.amdgcn.workitem.id.x()
183 %tid.ext = sext i32 %tid to i64
184 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
185 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
186 %lo.i = trunc i32 %val0 to i16
187 %lo = bitcast i16 %lo.i to half
188 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
189 %vec.1 = insertelement <2 x half> %vec.0, half 1.0, i32 1
190 %vec.i32 = bitcast <2 x half> %vec.1 to i32
191 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
192 ret void
193}
194
195; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_hi:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000196; GFX9: global_load_dword [[VAL:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000197
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +0000198; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]]
199; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000200
201; GFX9: ; use [[PACKED]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000202define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(i32 addrspace(1)* %in0) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000203 %tid = call i32 @llvm.amdgcn.workitem.id.x()
204 %tid.ext = sext i32 %tid to i64
205 %in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
206 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep
207 %lo.i = trunc i32 %val0 to i16
208 %lo = bitcast i16 %lo.i to half
209 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
210 %vec.1 = insertelement <2 x half> %vec.0, half 0xH0040, i32 1
211 %vec.i32 = bitcast <2 x half> %vec.1 to i32
212 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
213 ret void
214}
215
216declare i32 @llvm.amdgcn.workitem.id.x() #1
217
218attributes #0 = { nounwind }
219attributes #1 = { nounwind readnone }