blob: 005cfe6f955eb7276a605967beda14ea77f91d21 [file] [log] [blame]
Yaxun Liu35845f02017-11-10 02:03:28 +00001; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
Matt Arsenaultc5559bb2013-11-15 04:42:23 +00002
3;
Yaxun Liu35845f02017-11-10 02:03:28 +00004; kernel void combine_vloads(global char8 addrspace(5)* src, global char8 addrspace(5)* result) {
Matt Arsenaultc5559bb2013-11-15 04:42:23 +00005; for (int i = 0; i < 1024; ++i)
6; result[i] = src[0] + src[1] + src[2] + src[3];
7; }
8;
9
10
11; 128-bit loads instead of many 8-bit
Tom Stellard79243d92014-10-01 17:15:17 +000012; EG-LABEL: {{^}}combine_vloads:
Matt Arsenaultc5559bb2013-11-15 04:42:23 +000013; EG: VTX_READ_128
14; EG: VTX_READ_128
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
Matt Arsenaultc5559bb2013-11-15 04:42:23 +000016entry:
17 br label %for.body
18
19for.exit: ; preds = %for.body
20 ret void
21
22for.body: ; preds = %for.body, %entry
23 %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
24 %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
25 %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
David Blaikiea79ac142015-02-27 21:17:42 +000026 %vecload2 = load <8 x i32>, <8 x i32> addrspace(1)* %0, align 32
Matt Arsenaultc5559bb2013-11-15 04:42:23 +000027 %1 = bitcast <8 x i32> %vecload2 to <32 x i8>
28 %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
29 %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
30 %tmp9 = add nsw <8 x i8> %tmp5, %tmp8
31 %tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
32 %tmp13 = add nsw <8 x i8> %tmp9, %tmp12
33 %tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
34 %tmp17 = add nsw <8 x i8> %tmp13, %tmp16
David Blaikie79e6c742015-02-27 19:29:02 +000035 %scevgep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %result, i32 %i.01
Matt Arsenaultc5559bb2013-11-15 04:42:23 +000036 %2 = bitcast <8 x i8> %tmp17 to <2 x i32>
37 %3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)*
38 store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8
39 %tmp19 = add nsw i32 %i.01, 1
40 %exitcond = icmp eq i32 %tmp19, 1024
41 br i1 %exitcond, label %for.exit, label %for.body
42}