blob: 86055413d2cf64899e52a14975e230ff25d18f30 [file] [log] [blame]
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00003; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5
6; FUNC-LABEL: {{^}}local_load_i32:
7; GCN-NOT: s_wqm_b64
8; GCN: s_mov_b32 m0, -1
9; GCN: ds_read_b32
10
11; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000013entry:
14 %ld = load i32, i32 addrspace(3)* %in
15 store i32 %ld, i32 addrspace(3)* %out
16 ret void
17}
18
19; FUNC-LABEL: {{^}}local_load_v2i32:
20; GCN: ds_read_b64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000022entry:
23 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
24 store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
25 ret void
26}
27
28; FUNC-LABEL: {{^}}local_load_v3i32:
29; GCN-DAG: ds_read_b64
30; GCN-DAG: ds_read_b32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000031define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000032entry:
33 %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
34 store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
35 ret void
36}
37
38; FUNC-LABEL: {{^}}local_load_v4i32:
Tom Stellarde175d8a2016-08-26 21:36:47 +000039; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000040
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000041define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000042entry:
43 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
44 store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
45 ret void
46}
47
48; FUNC-LABEL: {{^}}local_load_v8i32:
Tom Stellarde175d8a2016-08-26 21:36:47 +000049; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
50; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000051define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000052entry:
53 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
54 store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
55 ret void
56}
57
58; FUNC-LABEL: {{^}}local_load_v16i32:
Tom Stellardc2ff0eb2016-08-29 19:15:22 +000059; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
60; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}}
61; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
62; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
63; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
64; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
65; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
66; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000067define amdgpu_kernel void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000068entry:
69 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
70 store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
71 ret void
72}
73
74; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000075define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000076 %ld = load i32, i32 addrspace(3)* %in
77 %ext = zext i32 %ld to i64
78 store i64 %ext, i64 addrspace(3)* %out
79 ret void
80}
81
82; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084 %ld = load i32, i32 addrspace(3)* %in
85 %ext = sext i32 %ld to i64
86 store i64 %ext, i64 addrspace(3)* %out
87 ret void
88}
89
90; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000091define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000092 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
93 %ext = zext <1 x i32> %ld to <1 x i64>
94 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
95 ret void
96}
97
98; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000099define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000100 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
101 %ext = sext <1 x i32> %ld to <1 x i64>
102 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
103 ret void
104}
105
106; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000107define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000108 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
109 %ext = zext <2 x i32> %ld to <2 x i64>
110 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
111 ret void
112}
113
114; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000115define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000116 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
117 %ext = sext <2 x i32> %ld to <2 x i64>
118 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
119 ret void
120}
121
122; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000123define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000124 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
125 %ext = zext <4 x i32> %ld to <4 x i64>
126 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
127 ret void
128}
129
130; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000132 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
133 %ext = sext <4 x i32> %ld to <4 x i64>
134 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
135 ret void
136}
137
138; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000139define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000140 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
141 %ext = zext <8 x i32> %ld to <8 x i64>
142 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
143 ret void
144}
145
146; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000148 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
149 %ext = sext <8 x i32> %ld to <8 x i64>
150 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
151 ret void
152}
153
154; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000155define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000156 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
157 %ext = sext <16 x i32> %ld to <16 x i64>
158 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
159 ret void
160}
161
162; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000163define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000164 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
165 %ext = zext <16 x i32> %ld to <16 x i64>
166 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
167 ret void
168}
169
170; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000171define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000172 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
173 %ext = sext <32 x i32> %ld to <32 x i64>
174 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
175 ret void
176}
177
178; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000179define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000180 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
181 %ext = zext <32 x i32> %ld to <32 x i64>
182 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
183 ret void
184}
185
186attributes #0 = { nounwind }