blob: c09dca4635f91abab0f6dfc46ba79dfbd5dcd467 [file] [log] [blame]
Matt Arsenault84445dd2017-11-30 22:51:26 +00001; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +00003
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +00004@lds = addrspace(3) global [512 x float] undef, align 4
5@lds.f64 = addrspace(3) global [512 x double] undef, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +00006
7
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00008; GCN-LABEL: @simple_read2st64_f32_0_1
9; CI: s_mov_b32 m0
10; GFX9-NOT: m0
11
12; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
13; GCN: s_waitcnt lgkmcnt(0)
14; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
15; CI: buffer_store_dword [[RESULT]]
16; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000018 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000019 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +000020 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000021 %add.x = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +000022 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
David Blaikiea79ac142015-02-27 21:17:42 +000023 %val1 = load float, float addrspace(3)* %arrayidx1, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000024 %sum = fadd float %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +000025 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000026 store float %sum, float addrspace(1)* %out.gep, align 4
27 ret void
28}
29
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000030; GCN-LABEL: @simple_read2st64_f32_1_2
31; CI: s_mov_b32 m0
32; GFX9-NOT: m0
33
34; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
35; GCN: s_waitcnt lgkmcnt(0)
36; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
37; CI: buffer_store_dword [[RESULT]]
38; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000039define amdgpu_kernel void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000040 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000041 %add.x.0 = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +000042 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +000043 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000044 %add.x.1 = add nsw i32 %x.i, 128
David Blaikie79e6c742015-02-27 19:29:02 +000045 %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +000046 %val1 = load float, float addrspace(3)* %arrayidx1, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000047 %sum = fadd float %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +000048 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000049 store float %sum, float addrspace(1)* %out.gep, align 4
50 ret void
51}
52
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000053; GCN-LABEL: @simple_read2st64_f32_max_offset
54; CI: s_mov_b32 m0
55; GFX9-NOT: m0
56
57; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
58; GCN: s_waitcnt lgkmcnt(0)
59; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
60; CI: buffer_store_dword [[RESULT]]
61; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000062define amdgpu_kernel void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000063 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000064 %add.x.0 = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +000065 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +000066 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000067 %add.x.1 = add nsw i32 %x.i, 16320
David Blaikie79e6c742015-02-27 19:29:02 +000068 %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +000069 %val1 = load float, float addrspace(3)* %arrayidx1, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000070 %sum = fadd float %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +000071 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000072 store float %sum, float addrspace(1)* %out.gep, align 4
73 ret void
74}
75
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000076; GCN-LABEL: @simple_read2st64_f32_over_max_offset
77; CI: s_mov_b32 m0
78; GFX9-NOT: m0
79
80; GCN-NOT: ds_read2st64_b32
Matt Arsenault84445dd2017-11-30 22:51:26 +000081; GCN-DAG: v_add_{{i|u}}32_e32 [[BIGADD:v[0-9]+]], {{(vcc, )?}}0x10000, {{v[0-9]+}}
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000082; GCN-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
83; GCN-DAG: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]{{$}}
84; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000085define amdgpu_kernel void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000086 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000087 %add.x.0 = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +000088 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +000089 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000090 %add.x.1 = add nsw i32 %x.i, 16384
David Blaikie79e6c742015-02-27 19:29:02 +000091 %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +000092 %val1 = load float, float addrspace(3)* %arrayidx1, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000093 %sum = fadd float %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +000094 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +000095 store float %sum, float addrspace(1)* %out.gep, align 4
96 ret void
97}
98
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000099; GCN-LABEL: @odd_invalid_read2st64_f32_0
100; CI: s_mov_b32 m0
101; GFX9-NOT: m0
102
103; GCN-NOT: ds_read2st64_b32
104; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000106 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000107 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000108 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000109 %add.x = add nsw i32 %x.i, 63
David Blaikie79e6c742015-02-27 19:29:02 +0000110 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
David Blaikiea79ac142015-02-27 21:17:42 +0000111 %val1 = load float, float addrspace(3)* %arrayidx1, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000112 %sum = fadd float %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000113 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000114 store float %sum, float addrspace(1)* %out.gep, align 4
115 ret void
116}
117
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000118; GCN-LABEL: @odd_invalid_read2st64_f32_1
119; CI: s_mov_b32 m0
120; GFX9-NOT: m0
121
122; GCN-NOT: ds_read2st64_b32
123; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000124define amdgpu_kernel void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000125 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000126 %add.x.0 = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +0000127 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +0000128 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000129 %add.x.1 = add nsw i32 %x.i, 127
David Blaikie79e6c742015-02-27 19:29:02 +0000130 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +0000131 %val1 = load float, float addrspace(3)* %arrayidx1, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000132 %sum = fadd float %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000133 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000134 store float %sum, float addrspace(1)* %out.gep, align 4
135 ret void
136}
137
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000138; GCN-LABEL: @simple_read2st64_f64_0_1
139; CI: s_mov_b32 m0
140; GFX9-NOT: m0
141
142; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
143; GCN: s_waitcnt lgkmcnt(0)
144; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
145; CI: buffer_store_dwordx2 [[RESULT]]
146; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000148 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000149 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000150 %val0 = load double, double addrspace(3)* %arrayidx0, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000151 %add.x = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +0000152 %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
David Blaikiea79ac142015-02-27 21:17:42 +0000153 %val1 = load double, double addrspace(3)* %arrayidx1, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000154 %sum = fadd double %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000155 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000156 store double %sum, double addrspace(1)* %out.gep, align 8
157 ret void
158}
159
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000160; GCN-LABEL: @simple_read2st64_f64_1_2
161; CI: s_mov_b32 m0
162; GFX9-NOT: m0
163
164; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
165; GCN: s_waitcnt lgkmcnt(0)
166; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
167
168; CI: buffer_store_dwordx2 [[RESULT]]
169; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000170define amdgpu_kernel void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000171 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000172 %add.x.0 = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +0000173 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +0000174 %val0 = load double, double addrspace(3)* %arrayidx0, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000175 %add.x.1 = add nsw i32 %x.i, 128
David Blaikie79e6c742015-02-27 19:29:02 +0000176 %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +0000177 %val1 = load double, double addrspace(3)* %arrayidx1, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000178 %sum = fadd double %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000179 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000180 store double %sum, double addrspace(1)* %out.gep, align 8
181 ret void
182}
183
184; Alignment only
185
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000186; GCN-LABEL: @misaligned_read2st64_f64
187; CI: s_mov_b32 m0
188; GFX9-NOT: m0
189
190; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
191; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
192; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000193define amdgpu_kernel void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000194 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000195 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000196 %val0 = load double, double addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000197 %add.x = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +0000198 %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
David Blaikiea79ac142015-02-27 21:17:42 +0000199 %val1 = load double, double addrspace(3)* %arrayidx1, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000200 %sum = fadd double %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000201 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000202 store double %sum, double addrspace(1)* %out.gep, align 4
203 ret void
204}
205
206; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000207; GCN-LABEL: @simple_read2st64_f64_max_offset
208; CI: s_mov_b32 m0
209; GFX9-NOT: m0
210
211; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
212; GCN: s_waitcnt lgkmcnt(0)
213; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
214
215; CI: buffer_store_dwordx2 [[RESULT]]
216; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217define amdgpu_kernel void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000218 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000219 %add.x.0 = add nsw i32 %x.i, 256
David Blaikie79e6c742015-02-27 19:29:02 +0000220 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +0000221 %val0 = load double, double addrspace(3)* %arrayidx0, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000222 %add.x.1 = add nsw i32 %x.i, 8128
David Blaikie79e6c742015-02-27 19:29:02 +0000223 %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +0000224 %val1 = load double, double addrspace(3)* %arrayidx1, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000225 %sum = fadd double %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000226 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000227 store double %sum, double addrspace(1)* %out.gep, align 8
228 ret void
229}
230
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000231; GCN-LABEL: @simple_read2st64_f64_over_max_offset
232; CI: s_mov_b32 m0
233; GFX9-NOT: m0
234
235; GCN-NOT: ds_read2st64_b64
236; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
Matt Arsenault84445dd2017-11-30 22:51:26 +0000237; GCN-DAG: v_add_{{i|u}}32_e32 [[BIGADD:v[0-9]+]], {{(vcc, )?}}0x10000, {{v[0-9]+}}
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000238; GCN: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
239; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000240define amdgpu_kernel void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000241 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000242 %add.x.0 = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +0000243 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +0000244 %val0 = load double, double addrspace(3)* %arrayidx0, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000245 %add.x.1 = add nsw i32 %x.i, 8192
David Blaikie79e6c742015-02-27 19:29:02 +0000246 %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +0000247 %val1 = load double, double addrspace(3)* %arrayidx1, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000248 %sum = fadd double %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000249 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000250 store double %sum, double addrspace(1)* %out.gep, align 8
251 ret void
252}
253
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000254; GCN-LABEL: @invalid_read2st64_f64_odd_offset
255; CI: s_mov_b32 m0
256; GFX9-NOT: m0
257
258; GCN-NOT: ds_read2st64_b64
259; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000260define amdgpu_kernel void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000261 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000262 %add.x.0 = add nsw i32 %x.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +0000263 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
David Blaikiea79ac142015-02-27 21:17:42 +0000264 %val0 = load double, double addrspace(3)* %arrayidx0, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000265 %add.x.1 = add nsw i32 %x.i, 8129
David Blaikie79e6c742015-02-27 19:29:02 +0000266 %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
David Blaikiea79ac142015-02-27 21:17:42 +0000267 %val1 = load double, double addrspace(3)* %arrayidx1, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000268 %sum = fadd double %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000269 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000270 store double %sum, double addrspace(1)* %out.gep, align 8
271 ret void
272}
273
274; The stride of 8 elements is 8 * 8 bytes. We need to make sure the
275; stride in elements, not bytes, is a multiple of 64.
276
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000277; GCN-LABEL: @byte_size_only_divisible_64_read2_f64
278; CI: s_mov_b32 m0
279; GFX9-NOT: m0
280
281; GCN-NOT: ds_read2st_b64
282; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
283; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000284define amdgpu_kernel void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000285 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000286 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000287 %val0 = load double, double addrspace(3)* %arrayidx0, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000288 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000289 %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
David Blaikiea79ac142015-02-27 21:17:42 +0000290 %val1 = load double, double addrspace(3)* %arrayidx1, align 8
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000291 %sum = fadd double %val0, %val1
David Blaikie79e6c742015-02-27 19:29:02 +0000292 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000293 store double %sum, double addrspace(1)* %out.gep, align 4
294 ret void
295}
296
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000297declare i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000298declare i32 @llvm.amdgcn.workitem.id.y() #1
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000299
Matt Arsenault45f82162016-07-11 23:35:48 +0000300attributes #0 = { nounwind }
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000301attributes #1 = { nounwind readnone }