blob: ab2bfcfd1fb7188970147d09e82632dc5d409c65 [file] [log] [blame]
Matt Arsenault2d793892016-07-05 18:25:02 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; Test that when extracting the same unknown vector index from an
4; insertelement the dynamic indexing is folded away.
5
6declare i32 @llvm.amdgcn.workitem.id.x() #0
7
8; No dynamic indexing required
9; GCN-LABEL: {{^}}extract_insert_same_dynelt_v4i32:
10; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd{{$}}
11; GCN-NOT buffer_load_dword
12; GCN-NOT: [[VAL]]
13; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
14; GCN-NOT: [[VVAL]]
15; GCN: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000016define amdgpu_kernel void @extract_insert_same_dynelt_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %val, i32 %idx) #1 {
Matt Arsenault2d793892016-07-05 18:25:02 +000017 %id = call i32 @llvm.amdgcn.workitem.id.x()
18 %id.ext = sext i32 %id to i64
19 %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %in, i64 %id.ext
20 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %id.ext
21 %vec = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in
22 %insert = insertelement <4 x i32> %vec, i32 %val, i32 %idx
23 %extract = extractelement <4 x i32> %insert, i32 %idx
24 store i32 %extract, i32 addrspace(1)* %gep.out
25 ret void
26}
27
28; GCN-LABEL: {{^}}extract_insert_different_dynelt_v4i32:
29; GCN: buffer_load_dwordx4
30; GCN: v_movreld_b32
31; GCN: v_movrels_b32
32; GCN: buffer_store_dword v
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000033define amdgpu_kernel void @extract_insert_different_dynelt_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %val, i32 %idx0, i32 %idx1) #1 {
Matt Arsenault2d793892016-07-05 18:25:02 +000034 %id = call i32 @llvm.amdgcn.workitem.id.x()
35 %id.ext = sext i32 %id to i64
36 %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %in, i64 %id.ext
37 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %id.ext
38 %vec = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in
39 %insert = insertelement <4 x i32> %vec, i32 %val, i32 %idx0
40 %extract = extractelement <4 x i32> %insert, i32 %idx1
41 store i32 %extract, i32 addrspace(1)* %gep.out
42 ret void
43}
44
45; GCN-LABEL: {{^}}extract_insert_same_elt2_v4i32:
46; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd{{$}}
47; GCN-NOT buffer_load_dword
48; GCN-NOT: [[VAL]]
49; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
50; GCN-NOT: [[VVAL]]
51; GCN: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000052define amdgpu_kernel void @extract_insert_same_elt2_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %val, i32 %idx) #1 {
Matt Arsenault2d793892016-07-05 18:25:02 +000053 %id = call i32 @llvm.amdgcn.workitem.id.x()
54 %id.ext = sext i32 %id to i64
55 %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %in, i64 %id.ext
56 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %id.ext
57 %vec = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in
58 %insert = insertelement <4 x i32> %vec, i32 %val, i32 %idx
59 %extract = extractelement <4 x i32> %insert, i32 %idx
60 store i32 %extract, i32 addrspace(1)* %gep.out
61 ret void
62}
63
Matt Arsenault3fb8f9e2016-07-08 21:25:33 +000064; GCN-LABEL: {{^}}extract_insert_same_dynelt_v4f32:
65; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd{{$}}
66; GCN-NOT buffer_load_dword
67; GCN-NOT: [[VAL]]
68; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
69; GCN-NOT: [[VVAL]]
70; GCN: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000071define amdgpu_kernel void @extract_insert_same_dynelt_v4f32(float addrspace(1)* %out, <4 x float> addrspace(1)* %in, float %val, i32 %idx) #1 {
Matt Arsenault3fb8f9e2016-07-08 21:25:33 +000072 %id = call i32 @llvm.amdgcn.workitem.id.x()
73 %id.ext = sext i32 %id to i64
74 %gep.in = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in, i64 %id.ext
75 %gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %id.ext
76 %vec = load volatile <4 x float>, <4 x float> addrspace(1)* %gep.in
77 %insert = insertelement <4 x float> %vec, float %val, i32 %idx
78 %extract = extractelement <4 x float> %insert, i32 %idx
79 store float %extract, float addrspace(1)* %gep.out
80 ret void
81}
Matt Arsenault2d793892016-07-05 18:25:02 +000082
83attributes #0 = { nounwind readnone }
84attributes #1 = { nounwind }