blob: 425d67d5b07cf8a207d5d5e49edcd73f1a7f1661 [file] [log] [blame]
Tom Stellard49f8bfd2015-01-06 18:00:21 +00001; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
2; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
Marek Olsak75170772015-01-27 17:27:15 +00003; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
4; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
Matt Arsenault3f981402014-09-15 15:41:53 +00005
6; Disable optimizations in case there are optimizations added that
7; specialize away generic pointer accesses.
8
9
Tom Stellard79243d92014-10-01 17:15:17 +000010; CHECK-LABEL: {{^}}branch_use_flat_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000011; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
12; CHECK: s_endpgm
Matt Arsenault3f981402014-09-15 15:41:53 +000013define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
14entry:
15 %cmp = icmp ne i32 %c, 0
16 br i1 %cmp, label %local, label %global
17
18local:
19 %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
20 br label %end
21
22global:
23 %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
24 br label %end
25
26end:
27 %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
28 store i32 %x, i32 addrspace(4)* %fptr, align 4
David Blaikiea79ac142015-02-27 21:17:42 +000029; %val = load i32, i32 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000030; store i32 %val, i32 addrspace(1)* %out, align 4
31 ret void
32}
33
34
35
36; These testcases might become useless when there are optimizations to
37; remove generic pointers.
38
Tom Stellard79243d92014-10-01 17:15:17 +000039; CHECK-LABEL: {{^}}store_flat_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000040; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
41; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
42; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
43; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
Matt Arsenault3f981402014-09-15 15:41:53 +000044define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
45 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
46 store i32 %x, i32 addrspace(4)* %fptr, align 4
47 ret void
48}
49
Tom Stellard79243d92014-10-01 17:15:17 +000050; CHECK-LABEL: {{^}}store_flat_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000051; CHECK: flat_store_dwordx2
Matt Arsenault3f981402014-09-15 15:41:53 +000052define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
53 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
54 store i64 %x, i64 addrspace(4)* %fptr, align 8
55 ret void
56}
57
Tom Stellard79243d92014-10-01 17:15:17 +000058; CHECK-LABEL: {{^}}store_flat_v4i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000059; CHECK: flat_store_dwordx4
Matt Arsenault3f981402014-09-15 15:41:53 +000060define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
61 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
62 store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
63 ret void
64}
65
Tom Stellard79243d92014-10-01 17:15:17 +000066; CHECK-LABEL: {{^}}store_flat_trunc_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +000067; CHECK: flat_store_short
Matt Arsenault3f981402014-09-15 15:41:53 +000068define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
69 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
70 %y = trunc i32 %x to i16
71 store i16 %y, i16 addrspace(4)* %fptr, align 2
72 ret void
73}
74
Tom Stellard79243d92014-10-01 17:15:17 +000075; CHECK-LABEL: {{^}}store_flat_trunc_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +000076; CHECK: flat_store_byte
Matt Arsenault3f981402014-09-15 15:41:53 +000077define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
78 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
79 %y = trunc i32 %x to i8
80 store i8 %y, i8 addrspace(4)* %fptr, align 2
81 ret void
82}
83
84
85
86; CHECK-LABEL @load_flat_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000087; CHECK: flat_load_dword
Matt Arsenault3f981402014-09-15 15:41:53 +000088define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
89 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
David Blaikiea79ac142015-02-27 21:17:42 +000090 %fload = load i32, i32 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000091 store i32 %fload, i32 addrspace(1)* %out, align 4
92 ret void
93}
94
95; CHECK-LABEL @load_flat_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000096; CHECK: flat_load_dwordx2
Matt Arsenault3f981402014-09-15 15:41:53 +000097define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
98 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
David Blaikiea79ac142015-02-27 21:17:42 +000099 %fload = load i64, i64 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000100 store i64 %fload, i64 addrspace(1)* %out, align 8
101 ret void
102}
103
104; CHECK-LABEL @load_flat_v4i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000105; CHECK: flat_load_dwordx4
Matt Arsenault3f981402014-09-15 15:41:53 +0000106define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
107 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
David Blaikiea79ac142015-02-27 21:17:42 +0000108 %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000109 store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
110 ret void
111}
112
113; CHECK-LABEL @sextload_flat_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000114; CHECK: flat_load_sbyte
Matt Arsenault3f981402014-09-15 15:41:53 +0000115define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
116 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
David Blaikiea79ac142015-02-27 21:17:42 +0000117 %fload = load i8, i8 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000118 %ext = sext i8 %fload to i32
119 store i32 %ext, i32 addrspace(1)* %out, align 4
120 ret void
121}
122
123; CHECK-LABEL @zextload_flat_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000124; CHECK: flat_load_ubyte
Matt Arsenault3f981402014-09-15 15:41:53 +0000125define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
126 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
David Blaikiea79ac142015-02-27 21:17:42 +0000127 %fload = load i8, i8 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000128 %ext = zext i8 %fload to i32
129 store i32 %ext, i32 addrspace(1)* %out, align 4
130 ret void
131}
132
133; CHECK-LABEL @sextload_flat_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000134; CHECK: flat_load_sshort
Matt Arsenault3f981402014-09-15 15:41:53 +0000135define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
136 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
David Blaikiea79ac142015-02-27 21:17:42 +0000137 %fload = load i16, i16 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000138 %ext = sext i16 %fload to i32
139 store i32 %ext, i32 addrspace(1)* %out, align 4
140 ret void
141}
142
143; CHECK-LABEL @zextload_flat_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000144; CHECK: flat_load_ushort
Matt Arsenault3f981402014-09-15 15:41:53 +0000145define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
146 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
David Blaikiea79ac142015-02-27 21:17:42 +0000147 %fload = load i16, i16 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000148 %ext = zext i16 %fload to i32
149 store i32 %ext, i32 addrspace(1)* %out, align 4
150 ret void
151}
152
153
154
155; TODO: This should not be zero when registers are used for small
156; scratch allocations again.
157
158; Check for prologue initializing special SGPRs pointing to scratch.
Tom Stellard79243d92014-10-01 17:15:17 +0000159; CHECK-LABEL: {{^}}store_flat_scratch:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000160; CHECK: s_movk_i32 flat_scratch_lo, 0
Matt Arsenault77849922014-11-13 20:44:23 +0000161; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
162; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
Tom Stellard326d6ec2014-11-05 14:50:53 +0000163; CHECK: flat_store_dword
164; CHECK: s_barrier
165; CHECK: flat_load_dword
Matt Arsenault3f981402014-09-15 15:41:53 +0000166define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
167 %alloca = alloca i32, i32 9, align 4
168 %x = call i32 @llvm.r600.read.tidig.x() #3
David Blaikie79e6c742015-02-27 19:29:02 +0000169 %pptr = getelementptr i32, i32* %alloca, i32 %x
Matt Arsenault3f981402014-09-15 15:41:53 +0000170 %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
171 store i32 %x, i32 addrspace(4)* %fptr
172 ; Dummy call
173 call void @llvm.AMDGPU.barrier.local() #1
David Blaikiea79ac142015-02-27 21:17:42 +0000174 %reload = load i32, i32 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000175 store i32 %reload, i32 addrspace(1)* %out, align 4
176 ret void
177}
178
179declare void @llvm.AMDGPU.barrier.local() #1
180declare i32 @llvm.r600.read.tidig.x() #3
181
182attributes #0 = { nounwind }
183attributes #1 = { nounwind noduplicate }
184attributes #3 = { nounwind readnone }