blob: f4040db379735e9304e8e63a1138b5fa046b59e8 [file] [log] [blame]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00006
Farhana Aleena7cb3112018-03-09 17:41:39 +00007; Testing for ds_read_b128
Alex Shlyapnikov79f2c722018-04-09 19:47:38 +00008; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
9; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
Farhana Aleena7cb3112018-03-09 17:41:39 +000010
Matt Arsenaultd1097a32016-06-02 19:54:26 +000011; FUNC-LABEL: {{^}}local_load_f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000012; SICIV: s_mov_b32 m0
13; GFX9-NOT: m0
14
Matt Arsenaultd1097a32016-06-02 19:54:26 +000015; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
16; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
17
18; EG: LDS_READ_RET
19; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000020define amdgpu_kernel void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000021 %ld = load double, double addrspace(3)* %in
22 store double %ld, double addrspace(3)* %out
23 ret void
24}
25
26; FUNC-LABEL: {{^}}local_load_v2f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000027; SICIV: s_mov_b32 m0
28; GFX9-NOT: m0
29
Matt Arsenaultd1097a32016-06-02 19:54:26 +000030; GCN: ds_read2_b64
31
32; EG: LDS_READ_RET
33; EG: LDS_READ_RET
34; EG: LDS_READ_RET
35; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000036define amdgpu_kernel void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000037entry:
38 %ld = load <2 x double>, <2 x double> addrspace(3)* %in
39 store <2 x double> %ld, <2 x double> addrspace(3)* %out
40 ret void
41}
42
43; FUNC-LABEL: {{^}}local_load_v3f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000044; SICIV: s_mov_b32 m0
45; GFX9-NOT: m0
46
Matt Arsenaultd1097a32016-06-02 19:54:26 +000047; GCN-DAG: ds_read2_b64
48; GCN-DAG: ds_read_b64
49
50; EG: LDS_READ_RET
51; EG: LDS_READ_RET
52; EG: LDS_READ_RET
53; EG: LDS_READ_RET
54; EG: LDS_READ_RET
55; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000056define amdgpu_kernel void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000057entry:
58 %ld = load <3 x double>, <3 x double> addrspace(3)* %in
59 store <3 x double> %ld, <3 x double> addrspace(3)* %out
60 ret void
61}
62
63; FUNC-LABEL: {{^}}local_load_v4f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000064; SICIV: s_mov_b32 m0
65; GFX9-NOT: m0
66
Matt Arsenaultd1097a32016-06-02 19:54:26 +000067; GCN: ds_read2_b64
68; GCN: ds_read2_b64
69
70; EG: LDS_READ_RET
71; EG: LDS_READ_RET
72; EG: LDS_READ_RET
73; EG: LDS_READ_RET
74
75; EG: LDS_READ_RET
76; EG: LDS_READ_RET
77; EG: LDS_READ_RET
78; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000079define amdgpu_kernel void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000080entry:
81 %ld = load <4 x double>, <4 x double> addrspace(3)* %in
82 store <4 x double> %ld, <4 x double> addrspace(3)* %out
83 ret void
84}
85
86; FUNC-LABEL: {{^}}local_load_v8f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000087; SICIV: s_mov_b32 m0
88; GFX9-NOT: m0
89
Matt Arsenaultd1097a32016-06-02 19:54:26 +000090; GCN: ds_read2_b64
91; GCN: ds_read2_b64
92; GCN: ds_read2_b64
93; GCN: ds_read2_b64
94
95; EG: LDS_READ_RET
96; EG: LDS_READ_RET
97; EG: LDS_READ_RET
98; EG: LDS_READ_RET
99; EG: LDS_READ_RET
100; EG: LDS_READ_RET
101; EG: LDS_READ_RET
102; EG: LDS_READ_RET
103; EG: LDS_READ_RET
104; EG: LDS_READ_RET
105; EG: LDS_READ_RET
106; EG: LDS_READ_RET
107; EG: LDS_READ_RET
108; EG: LDS_READ_RET
109; EG: LDS_READ_RET
110; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000111define amdgpu_kernel void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000112entry:
113 %ld = load <8 x double>, <8 x double> addrspace(3)* %in
114 store <8 x double> %ld, <8 x double> addrspace(3)* %out
115 ret void
116}
117
118; FUNC-LABEL: {{^}}local_load_v16f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000119; SICIV: s_mov_b32 m0
120; GFX9-NOT: m0
121
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000122; GCN: ds_read2_b64
123; GCN: ds_read2_b64
124; GCN: ds_read2_b64
125; GCN: ds_read2_b64
126; GCN: ds_read2_b64
127; GCN: ds_read2_b64
128; GCN: ds_read2_b64
129; GCN: ds_read2_b64
130
131; EG: LDS_READ_RET
132; EG: LDS_READ_RET
133; EG: LDS_READ_RET
134; EG: LDS_READ_RET
135
136; EG: LDS_READ_RET
137; EG: LDS_READ_RET
138; EG: LDS_READ_RET
139; EG: LDS_READ_RET
140
141; EG: LDS_READ_RET
142; EG: LDS_READ_RET
143; EG: LDS_READ_RET
144; EG: LDS_READ_RET
145
146; EG: LDS_READ_RET
147; EG: LDS_READ_RET
148; EG: LDS_READ_RET
149; EG: LDS_READ_RET
150
151; EG: LDS_READ_RET
152; EG: LDS_READ_RET
153; EG: LDS_READ_RET
154; EG: LDS_READ_RET
155
156; EG: LDS_READ_RET
157; EG: LDS_READ_RET
158; EG: LDS_READ_RET
159; EG: LDS_READ_RET
160
161; EG: LDS_READ_RET
162; EG: LDS_READ_RET
163; EG: LDS_READ_RET
164; EG: LDS_READ_RET
165
166; EG: LDS_READ_RET
167; EG: LDS_READ_RET
168; EG: LDS_READ_RET
169; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000170define amdgpu_kernel void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000171entry:
172 %ld = load <16 x double>, <16 x double> addrspace(3)* %in
173 store <16 x double> %ld, <16 x double> addrspace(3)* %out
174 ret void
175}
176
Farhana Aleena7cb3112018-03-09 17:41:39 +0000177; Tests if ds_read_b128 gets generated for the 16 byte aligned load.
178; FUNC-LABEL: {{^}}local_load_v2f64_to_128:
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000179
Farhana Aleena7cb3112018-03-09 17:41:39 +0000180; CIVI: ds_read_b128
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000181; CIVI: ds_write_b128
182
Farhana Aleena7cb3112018-03-09 17:41:39 +0000183; EG: LDS_READ_RET
184; EG: LDS_READ_RET
185; EG: LDS_READ_RET
186; EG: LDS_READ_RET
187define amdgpu_kernel void @local_load_v2f64_to_128(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) {
188entry:
189 %ld = load <2 x double>, <2 x double> addrspace(3)* %in, align 16
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000190 store <2 x double> %ld, <2 x double> addrspace(3)* %out, align 16
Farhana Aleena7cb3112018-03-09 17:41:39 +0000191 ret void
192}
193
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000194attributes #0 = { nounwind }