blob: a2f647cefd1d8d97a37454c1a9988f5109b8d986 [file] [log] [blame]
Konstantin Zhuravlyov3d1cc882017-04-21 19:45:22 +00001; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
Sam Kolton9fa16962017-04-06 15:03:28 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003
4; FIXME: Need to handle non-uniform case for function below (load without gep).
5; GCN-LABEL: {{^}}v_test_add_v2i16:
6; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
7
Sam Kolton9fa16962017-04-06 15:03:28 +00008; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +00009; VI: v_add_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000010define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000011 %tid = call i32 @llvm.amdgcn.workitem.id.x()
12 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
13 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
14 %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid
15 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
16 %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1
17 %add = add <2 x i16> %a, %b
18 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
19 ret void
20}
21
22; GCN-LABEL: {{^}}s_test_add_v2i16:
23; GFX9: s_load_dword [[VAL0:s[0-9]+]]
24; GFX9: s_load_dword [[VAL1:s[0-9]+]]
25; GFX9: v_mov_b32_e32 [[VVAL1:v[0-9]+]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000026; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VAL0]], [[VVAL1]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000027
28; VI: s_add_i32
29; VI: s_add_i32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000031 %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
32 %b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
33 %add = add <2 x i16> %a, %b
34 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
35 ret void
36}
37
38; GCN-LABEL: {{^}}s_test_add_self_v2i16:
39; GFX9: s_load_dword [[VAL:s[0-9]+]]
40; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VAL]], [[VAL]]
41
42; VI: s_add_i32
43; VI: s_add_i32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000044define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000045 %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
46 %add = add <2 x i16> %a, %a
47 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
48 ret void
49}
50
51; FIXME: VI should not scalarize arg access.
52; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000053; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +000054
55; VI: v_add_i32
Sam Kolton9fa16962017-04-06 15:03:28 +000056; VI: v_add_i32_sdwa
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000058 %add = add <2 x i16> %a, %b
59 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
60 ret void
61}
62
63; GCN-LABEL: {{^}}v_test_add_v2i16_constant:
64; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0x1c8007b{{$}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000065; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000066
67; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000068; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +000069; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000070define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000071 %tid = call i32 @llvm.amdgcn.workitem.id.x()
72 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
73 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
74 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
75 %add = add <2 x i16> %a, <i16 123, i16 456>
76 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
77 ret void
78}
79
80; FIXME: Need to handle non-uniform case for function below (load without gep).
81; GCN-LABEL: {{^}}v_test_add_v2i16_neg_constant:
82; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000083; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000084
85; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000086; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +000087; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000088define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000089 %tid = call i32 @llvm.amdgcn.workitem.id.x()
90 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
91 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
92 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
93 %add = add <2 x i16> %a, <i16 -845, i16 -991>
94 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
95 ret void
96}
97
98; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1:
99; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}}
100
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000101; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000102; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
103; VI: flat_load_ushort [[LOAD1:v[0-9]+]]
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000104; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000105; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000106; VI: v_or_b32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000107define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000108 %tid = call i32 @llvm.amdgcn.workitem.id.x()
109 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
110 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
111 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
112 %add = add <2 x i16> %a, <i16 -1, i16 -1>
113 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
114 ret void
115}
116
117; GCN-LABEL: {{^}}v_test_add_v2i16_inline_lo_zero_hi:
118; GFX9: s_mov_b32 [[K:s[0-9]+]], 32{{$}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000119; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000120
121; VI-NOT: v_add_u16
122; VI: v_add_u16_e32 v{{[0-9]+}}, 32, v{{[0-9]+}}
123; VI-NOT: v_add_u16
124; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
125; VI: v_or_b32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000126define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000127 %tid = call i32 @llvm.amdgcn.workitem.id.x()
128 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
129 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
130 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
131 %add = add <2 x i16> %a, <i16 32, i16 0>
132 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
133 ret void
134}
135
136; The high element gives fp
137; GCN-LABEL: {{^}}v_test_add_v2i16_inline_fp_split:
138; GFX9: s_mov_b32 [[K:s[0-9]+]], 1.0
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000139; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000140
141; VI-NOT: v_add_u16
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000142; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000143; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000144; VI-NOT: v_add_u16
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000145; VI: v_or_b32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000146define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000147 %tid = call i32 @llvm.amdgcn.workitem.id.x()
148 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
149 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
150 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
151 %add = add <2 x i16> %a, <i16 0, i16 16256>
152 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
153 ret void
154}
155
156; FIXME: Need to handle non-uniform case for function below (load without gep).
157; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i32:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000158; GFX9: global_load_dword [[A:v[0-9]+]]
159; GFX9: global_load_dword [[B:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000160
161; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
162; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
163; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
164; GFX9: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}}
165
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000166; VI: flat_load_ushort v[[A_LO:[0-9]+]]
Matt Arsenault301162c2017-11-15 21:51:43 +0000167; VI: flat_load_ushort v[[A_HI:[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000168; VI: flat_load_ushort v[[B_LO:[0-9]+]]
Matt Arsenault301162c2017-11-15 21:51:43 +0000169; VI: flat_load_ushort v[[B_HI:[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000170
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000171; VI: v_add_u16_e32 v[[ADD_HI:[0-9]+]], v[[A_HI]], v[[B_HI]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000172; VI-NOT: and
173; VI-NOT: shl
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000174; VI: v_add_u16_e32 v[[ADD_LO:[0-9]+]], v[[A_LO]], v[[B_LO]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000175; VI-NOT: and
176; VI-NOT: shl
177; VI: buffer_store_dwordx2 v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000178define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000179 %tid = call i32 @llvm.amdgcn.workitem.id.x()
180 %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
181 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
182 %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid
183 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
184 %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1
185 %add = add <2 x i16> %a, %b
186 %ext = zext <2 x i16> %add to <2 x i32>
187 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
188 ret void
189}
190
191; FIXME: Need to handle non-uniform case for function below (load without gep).
192; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i64:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000193; GFX9: global_load_dword [[A:v[0-9]+]]
194; GFX9: global_load_dword [[B:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000195
196; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
197; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
198; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000199; GFX9: buffer_store_dwordx4
200
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000201; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000202; VI: flat_load_ushort v[[A_LO:[0-9]+]]
203; VI: flat_load_ushort v[[A_HI:[0-9]+]]
204; VI: flat_load_ushort v[[B_LO:[0-9]+]]
205; VI: flat_load_ushort v[[B_HI:[0-9]+]]
206
Stanislav Mekhanoshin5fa289f2017-05-22 16:58:10 +0000207; VI-DAG: v_add_u16_e32
208; VI-DAG: v_add_u16_e32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000209
210; VI: buffer_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000211define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000212 %tid = call i32 @llvm.amdgcn.workitem.id.x()
213 %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
214 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
215 %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid
216 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
217 %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1
218 %add = add <2 x i16> %a, %b
219 %ext = zext <2 x i16> %add to <2 x i64>
220 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
221 ret void
222}
223
224; FIXME: Need to handle non-uniform case for function below (load without gep).
225; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i32:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000226; GFX9: global_load_dword [[A:v[0-9]+]]
227; GFX9: global_load_dword [[B:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000228
229; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
230; GFX9-DAG: v_bfe_i32 v[[ELT0:[0-9]+]], [[ADD]], 0, 16
231; GFX9-DAG: v_ashrrev_i32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
232; GFX9: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}}
233
234; VI: v_add_u16_e32
235; VI: v_add_u16_e32
236; VI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
237; VI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
238; VI: buffer_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000239define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000240 %tid = call i32 @llvm.amdgcn.workitem.id.x()
241 %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
242 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
243 %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid
244 %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
245 %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1
246 %add = add <2 x i16> %a, %b
247 %ext = sext <2 x i16> %add to <2 x i32>
248 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
249 ret void
250}
251
252; FIXME: Need to handle non-uniform case for function below (load without gep).
253; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i64:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000254; GCN: {{flat|global}}_load_dword
255; GCN: {{flat|global}}_load_dword
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000256
257; GFX9: v_pk_add_u16
258; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
259
Sam Kolton9fa16962017-04-06 15:03:28 +0000260; VI: v_add_u16_sdwa
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000261; VI: v_add_u16_e32
262
263; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
264; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
265; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
266; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000267define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000268 %tid = call i32 @llvm.amdgcn.workitem.id.x()
269 %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
270 %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
271 %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid
272 %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep.in0
273 %b = load <2 x i16>, <2 x i16> addrspace(1)* %gep.in1
274 %add = add <2 x i16> %a, %b
275 %ext = sext <2 x i16> %add to <2 x i64>
276 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
277 ret void
278}
279
280declare i32 @llvm.amdgcn.workitem.id.x() #0
281
282attributes #0 = { nounwind readnone }
283attributes #1 = { nounwind }