[AMDGPU] Switch to the new addr space mapping by default
This requires corresponding clang change.
Differential Revision: https://reviews.llvm.org/D40955
llvm-svn: 324101
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
index 1f6cb85..ddb3148 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
@@ -1,45 +1,45 @@
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s
; CHECK: 'addrspacecast_global_to_flat'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)*
-define i8 addrspace(4)* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
- %cast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)*
- ret i8 addrspace(4)* %cast
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8*
+define i8* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
+ %cast = addrspacecast i8 addrspace(1)* %ptr to i8*
+ ret i8* %cast
}
; CHECK: 'addrspacecast_global_to_flat_v2'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*>
-define <2 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
- %cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*>
- ret <2 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
+define <2 x i8*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
+ %cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
+ ret <2 x i8*> %cast
}
; CHECK: 'addrspacecast_global_to_flat_v32'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*>
-define <32 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
- %cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*>
- ret <32 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
+define <32 x i8*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
+ %cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
+ ret <32 x i8*> %cast
}
; CHECK: 'addrspacecast_local_to_flat'
-; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)*
-define i8 addrspace(4)* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)*
- ret i8 addrspace(4)* %cast
+; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8*
+define i8* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %ptr to i8*
+ ret i8* %cast
}
; CHECK: 'addrspacecast_local_to_flat_v2'
-; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*>
-define <2 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
- %cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*>
- ret <2 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
+define <2 x i8*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
+ %cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
+ ret <2 x i8*> %cast
}
; CHECK: 'addrspacecast_local_to_flat_v32'
-; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*>
-define <32 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
- %cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*>
- ret <32 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
+define <32 x i8*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
+ %cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
+ ret <32 x i8*> %cast
}
attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll b/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll
index 8ad1cbb..0d725a1 100644
--- a/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll
+++ b/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll
@@ -4,9 +4,9 @@
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: ;;#ASMEND
-define void @foo(i32* %ptr) {
+define void @foo(i32 addrspace(5)* %ptr) {
%tmp = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm "s_nop 0", "=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65"(i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2)
%tmp2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %tmp, 0
- store i32 %tmp2, i32* %ptr, align 4
+ store i32 %tmp2, i32 addrspace(5)* %ptr, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 27426fb..6353308 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -35,8 +35,8 @@
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
+ store volatile i32 7, i32* %stof
ret void
}
@@ -73,9 +73,9 @@
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
- %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %stof
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 {
+ %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
+ store volatile i32 7, i32* %stof
ret void
}
@@ -89,8 +89,8 @@
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
+ store volatile i32 7, i32* %stof
ret void
}
@@ -101,8 +101,8 @@
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
- %ld = load volatile i32, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(2)* %ptr to i32*
+ %ld = load volatile i32, i32* %stof
ret void
}
@@ -117,8 +117,8 @@
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
@@ -134,9 +134,9 @@
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
- store volatile i32 0, i32* %ftos
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
+ store volatile i32 0, i32 addrspace(5)* %ftos
ret void
}
@@ -148,8 +148,8 @@
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
; HSA: {{flat|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
@@ -159,8 +159,8 @@
; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
load volatile i32, i32 addrspace(2)* %ftos
ret void
}
@@ -178,8 +178,8 @@
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %cast
+ %cast = addrspacecast i32 addrspace(3)* null to i32*
+ store volatile i32 7, i32* %cast
ret void
}
@@ -188,7 +188,7 @@
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
+ %cast = addrspacecast i32* null to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
}
@@ -199,8 +199,8 @@
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %cast
+ %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32*
+ store volatile i32 7, i32* %cast
ret void
}
@@ -209,7 +209,7 @@
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
+ %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
}
@@ -224,8 +224,8 @@
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32* null to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %cast
+ %cast = addrspacecast i32 addrspace(5)* null to i32*
+ store volatile i32 7, i32* %cast
ret void
}
@@ -233,8 +233,8 @@
; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
- store volatile i32 7, i32* %cast
+ %cast = addrspacecast i32* null to i32 addrspace(5)*
+ store volatile i32 7, i32 addrspace(5)* %cast
ret void
}
@@ -250,17 +250,17 @@
br i1 %cmp, label %local, label %global
local:
- %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
+ %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32*
br label %end
global:
- %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+ %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32*
br label %end
end:
- %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
- store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
-; %val = load i32, i32 addrspace(4)* %fptr, align 4
+ %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ]
+ store volatile i32 %x, i32* %fptr, align 4
+; %val = load i32, i32* %fptr, align 4
; store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -278,14 +278,14 @@
; HSA: s_barrier
; HSA: {{flat|global}}_load_dword
define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
- %alloca = alloca i32, i32 9, align 4
+ %alloca = alloca i32, i32 9, align 4, addrspace(5)
%x = call i32 @llvm.amdgcn.workitem.id.x() #2
- %pptr = getelementptr i32, i32* %alloca, i32 %x
- %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
- store volatile i32 %x, i32 addrspace(4)* %fptr
+ %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x
+ %fptr = addrspacecast i32 addrspace(5)* %pptr to i32*
+ store volatile i32 %x, i32* %fptr
; Dummy call
call void @llvm.amdgcn.s.barrier() #1
- %reload = load volatile i32, i32 addrspace(4)* %fptr, align 4
+ %reload = load volatile i32, i32* %fptr, align 4
store volatile i32 %reload, i32 addrspace(1)* %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
index 1d5c538..048bb30 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
@@ -17,13 +17,13 @@
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
- %0 = alloca [2 x i32]
- %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
- store i32 0, i32* %1
- store i32 1, i32* %2
- %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
- %4 = load i32, i32* %3
+ %0 = alloca [2 x i32], addrspace(5)
+ %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %1
+ store i32 1, i32 addrspace(5)* %2
+ %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
+ %4 = load i32, i32 addrspace(5)* %3
%5 = call i32 @llvm.amdgcn.workitem.id.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
index e68ed9c..51d9649 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
@@ -1,9 +1,9 @@
; RUN: opt -mtriple=amdgcn-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
-; CHECK: NoAlias: i8 addrspace(1)* %p1, i8* %p
+; CHECK: NoAlias: i8 addrspace(1)* %p1, i8 addrspace(5)* %p
-define void @test(i8* %p, i8 addrspace(1)* %p1) {
+define void @test(i8 addrspace(5)* %p, i8 addrspace(1)* %p1) {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
index 8af4b33..9d81cd5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=amdgcn--amdhsa -O3 -S -amdgpu-function-calls -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s
-; RUN: opt -mtriple=amdgcn--amdhsa -O3 -S -amdgpu-function-calls < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s
+; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -amdgpu-function-calls -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s
+; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -amdgpu-function-calls < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s
define coldcc float @foo(float %x, float %y) {
entry:
@@ -10,30 +10,30 @@
ret float %cond
}
-define coldcc void @foo_private_ptr(float* nocapture %p) {
+define coldcc void @foo_private_ptr(float addrspace(5)* nocapture %p) {
entry:
- %tmp1 = load float, float* %p, align 4
+ %tmp1 = load float, float addrspace(5)* %p, align 4
%cmp = fcmp ogt float %tmp1, 1.000000e+00
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%div = fdiv float 1.000000e+00, %tmp1
- store float %div, float* %p, align 4
+ store float %div, float addrspace(5)* %p, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
-define coldcc void @foo_private_ptr2(float* nocapture %p1, float* nocapture %p2) {
+define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
entry:
- %tmp1 = load float, float* %p1, align 4
+ %tmp1 = load float, float addrspace(5)* %p1, align 4
%cmp = fcmp ogt float %tmp1, 1.000000e+00
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%div = fdiv float 2.000000e+00, %tmp1
- store float %div, float* %p2, align 4
+ store float %div, float addrspace(5)* %p2, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
@@ -46,11 +46,11 @@
ret float %call
}
-define void @foo_noinline(float* nocapture %p) #0 {
+define void @foo_noinline(float addrspace(5)* nocapture %p) #0 {
entry:
- %tmp1 = load float, float* %p, align 4
+ %tmp1 = load float, float addrspace(5)* %p, align 4
%mul = fmul float %tmp1, 2.000000e+00
- store float %mul, float* %p, align 4
+ store float %mul, float addrspace(5)* %p, align 4
ret void
}
@@ -63,7 +63,7 @@
; GCN: tail call float @_Z3sinf(
define amdgpu_kernel void @test_inliner(float addrspace(1)* nocapture %a, i32 %n) {
entry:
- %pvt_arr = alloca [64 x float], align 4
+ %pvt_arr = alloca [64 x float], align 4, addrspace(5)
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid
%tmp2 = load float, float addrspace(1)* %arrayidx, align 4
@@ -72,22 +72,22 @@
%tmp5 = load float, float addrspace(1)* %arrayidx2, align 4
%c1 = tail call coldcc float @foo(float %tmp2, float %tmp5)
%or = or i32 %tid, %n
- %arrayidx5 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %or
- store float %c1, float* %arrayidx5, align 4
- %arrayidx7 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %or
- call coldcc void @foo_private_ptr(float* %arrayidx7)
- %arrayidx8 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 1
- %arrayidx9 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 2
- call coldcc void @foo_private_ptr2(float* %arrayidx8, float* %arrayidx9)
- call void @foo_noinline(float* %arrayidx7)
+ %arrayidx5 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or
+ store float %c1, float addrspace(5)* %arrayidx5, align 4
+ %arrayidx7 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or
+ call coldcc void @foo_private_ptr(float addrspace(5)* %arrayidx7)
+ %arrayidx8 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 1
+ %arrayidx9 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 2
+ call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9)
+ call void @foo_noinline(float addrspace(5)* %arrayidx7)
%and = and i32 %tid, %n
- %arrayidx11 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %and
- %tmp12 = load float, float* %arrayidx11, align 4
+ %arrayidx11 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %and
+ %tmp12 = load float, float addrspace(5)* %arrayidx11, align 4
%c2 = call coldcc float @sin_wrapper(float %tmp12)
- store float %c2, float* %arrayidx7, align 4
+ store float %c2, float addrspace(5)* %arrayidx7, align 4
%xor = xor i32 %tid, %n
- %arrayidx16 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %xor
- %tmp16 = load float, float* %arrayidx16, align 4
+ %arrayidx16 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %xor
+ %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4
store float %tmp16, float addrspace(1)* %arrayidx, align 4
ret void
}
@@ -96,23 +96,23 @@
; GCN: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i
define amdgpu_kernel void @test_inliner_multi_pvt_ptr(float addrspace(1)* nocapture %a, i32 %n, float %v) {
entry:
- %pvt_arr1 = alloca [32 x float], align 4
- %pvt_arr2 = alloca [32 x float], align 4
+ %pvt_arr1 = alloca [32 x float], align 4, addrspace(5)
+ %pvt_arr2 = alloca [32 x float], align 4, addrspace(5)
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid
%or = or i32 %tid, %n
- %arrayidx4 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %or
- %arrayidx5 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr2, i32 0, i32 %or
- store float %v, float* %arrayidx4, align 4
- store float %v, float* %arrayidx5, align 4
- %arrayidx8 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 1
- %arrayidx9 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr2, i32 0, i32 2
- call coldcc void @foo_private_ptr2(float* %arrayidx8, float* %arrayidx9)
+ %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or
+ %arrayidx5 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %or
+ store float %v, float addrspace(5)* %arrayidx4, align 4
+ store float %v, float addrspace(5)* %arrayidx5, align 4
+ %arrayidx8 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 1
+ %arrayidx9 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 2
+ call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9)
%xor = xor i32 %tid, %n
- %arrayidx15 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %xor
- %arrayidx16 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr2, i32 0, i32 %xor
- %tmp15 = load float, float* %arrayidx15, align 4
- %tmp16 = load float, float* %arrayidx16, align 4
+ %arrayidx15 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %xor
+ %arrayidx16 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %xor
+ %tmp15 = load float, float addrspace(5)* %arrayidx15, align 4
+ %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4
%tmp17 = fadd float %tmp15, %tmp16
store float %tmp17, float addrspace(1)* %arrayidx, align 4
ret void
@@ -123,23 +123,23 @@
; GCN-INLDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i
define amdgpu_kernel void @test_inliner_multi_pvt_ptr_cutoff(float addrspace(1)* nocapture %a, i32 %n, float %v) {
entry:
- %pvt_arr1 = alloca [32 x float], align 4
- %pvt_arr2 = alloca [33 x float], align 4
+ %pvt_arr1 = alloca [32 x float], align 4, addrspace(5)
+ %pvt_arr2 = alloca [33 x float], align 4, addrspace(5)
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid
%or = or i32 %tid, %n
- %arrayidx4 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %or
- %arrayidx5 = getelementptr inbounds [33 x float], [33 x float]* %pvt_arr2, i32 0, i32 %or
- store float %v, float* %arrayidx4, align 4
- store float %v, float* %arrayidx5, align 4
- %arrayidx8 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 1
- %arrayidx9 = getelementptr inbounds [33 x float], [33 x float]* %pvt_arr2, i32 0, i32 2
- call coldcc void @foo_private_ptr2(float* %arrayidx8, float* %arrayidx9)
+ %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or
+ %arrayidx5 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %or
+ store float %v, float addrspace(5)* %arrayidx4, align 4
+ store float %v, float addrspace(5)* %arrayidx5, align 4
+ %arrayidx8 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 1
+ %arrayidx9 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 2
+ call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9)
%xor = xor i32 %tid, %n
- %arrayidx15 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %xor
- %arrayidx16 = getelementptr inbounds [33 x float], [33 x float]* %pvt_arr2, i32 0, i32 %xor
- %tmp15 = load float, float* %arrayidx15, align 4
- %tmp16 = load float, float* %arrayidx16, align 4
+ %arrayidx15 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %xor
+ %arrayidx16 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %xor
+ %tmp15 = load float, float addrspace(5)* %arrayidx15, align 4
+ %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4
%tmp17 = fadd float %tmp15, %tmp16
store float %tmp17, float addrspace(1)* %arrayidx, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
index 71c4c83..e48da47 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
@@ -5,8 +5,8 @@
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
@@ -80,19 +80,19 @@
; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1
define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
@@ -102,19 +102,19 @@
; OPT: getelementptr inbounds [256 x [8 x i32]], [256 x [8 x i32]] addrspace(3)* @high_alignment.stack, i32 0, i32 %{{[0-9]+}}
define amdgpu_kernel void @high_alignment(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [8 x i32], align 16
+ %stack = alloca [8 x i32], align 16, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
@@ -127,19 +127,19 @@
; SI-NOT: ds_write
define amdgpu_kernel void @no_replace_inbounds_gep(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
@@ -162,20 +162,20 @@
define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 {
entry:
- %a = alloca %struct.point
- %b = alloca %struct.point
- %a.x.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
- %a.y.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 1
- %b.x.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
- %b.y.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 1
- store i32 0, i32* %a.x.ptr
- store i32 1, i32* %a.y.ptr
- store i32 2, i32* %b.x.ptr
- store i32 3, i32* %b.y.ptr
- %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
- %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
- %a.indirect = load i32, i32* %a.indirect.ptr
- %b.indirect = load i32, i32* %b.indirect.ptr
+ %a = alloca %struct.point, addrspace(5)
+ %b = alloca %struct.point, addrspace(5)
+ %a.x.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
+ %a.y.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 1
+ %b.x.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
+ %b.y.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %a.x.ptr
+ store i32 1, i32 addrspace(5)* %a.y.ptr
+ store i32 2, i32 addrspace(5)* %b.x.ptr
+ store i32 3, i32 addrspace(5)* %b.y.ptr
+ %a.indirect.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
+ %b.indirect.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
+ %a.indirect = load i32, i32 addrspace(5)* %a.indirect.ptr
+ %b.indirect = load i32, i32 addrspace(5)* %b.indirect.ptr
%0 = add i32 %a.indirect, %b.indirect
store i32 %0, i32 addrspace(1)* %out
ret void
@@ -191,32 +191,32 @@
define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
entry:
- %prv_array_const = alloca [2 x i32]
- %prv_array = alloca [2 x i32]
+ %prv_array_const = alloca [2 x i32], addrspace(5)
+ %prv_array = alloca [2 x i32], addrspace(5)
%a = load i32, i32 addrspace(1)* %in
%b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%b = load i32, i32 addrspace(1)* %b_src_ptr
- %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
- store i32 %a, i32* %a_dst_ptr
- %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
- store i32 %b, i32* %b_dst_ptr
+ %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
+ store i32 %a, i32 addrspace(5)* %a_dst_ptr
+ %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 1
+ store i32 %b, i32 addrspace(5)* %b_dst_ptr
br label %for.body
for.body:
%inc = phi i32 [0, %entry], [%count, %for.body]
- %x_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
- %x = load i32, i32* %x_ptr
- %y_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
- %y = load i32, i32* %y_ptr
+ %x_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
+ %x = load i32, i32 addrspace(5)* %x_ptr
+ %y_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
+ %y = load i32, i32 addrspace(5)* %y_ptr
%xy = add i32 %x, %y
- store i32 %xy, i32* %y_ptr
+ store i32 %xy, i32 addrspace(5)* %y_ptr
%count = add i32 %inc, 1
%done = icmp eq i32 %count, 4095
br i1 %done, label %for.end, label %for.body
for.end:
- %value_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
- %value = load i32, i32* %value_ptr
+ %value_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
+ %value = load i32, i32 addrspace(5)* %value_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
@@ -235,13 +235,13 @@
; SI-PROMOTE: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[SCALED_IDX]], 16
define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %0 = alloca [2 x i16]
- %1 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 0
- %2 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 1
- store i16 0, i16* %1
- store i16 1, i16* %2
- %3 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 %index
- %4 = load i16, i16* %3
+ %0 = alloca [2 x i16], addrspace(5)
+ %1 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 1
+ store i16 0, i16 addrspace(5)* %1
+ store i16 1, i16 addrspace(5)* %2
+ %3 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 %index
+ %4 = load i16, i16 addrspace(5)* %3
%5 = sext i16 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -258,13 +258,13 @@
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: [0x05,0x00,0x60,0xe0
define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %0 = alloca [2 x i8]
- %1 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 0
- %2 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 1
- store i8 0, i8* %1
- store i8 1, i8* %2
- %3 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 %index
- %4 = load i8, i8* %3
+ %0 = alloca [2 x i8], addrspace(5)
+ %1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 1
+ store i8 0, i8 addrspace(5)* %1
+ store i8 1, i8 addrspace(5)* %2
+ %3 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 %index
+ %4 = load i8, i8 addrspace(5)* %3
%5 = sext i8 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -281,22 +281,22 @@
; SI: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ;
define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %0 = alloca [3 x i8], align 1
- %1 = alloca [2 x i8], align 1
- %2 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 0
- %3 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 1
- %4 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 2
- %5 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 0
- %6 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 1
- store i8 0, i8* %2
- store i8 1, i8* %3
- store i8 2, i8* %4
- store i8 1, i8* %5
- store i8 0, i8* %6
- %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in
- %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in
- %9 = load i8, i8* %7
- %10 = load i8, i8* %8
+ %0 = alloca [3 x i8], align 1, addrspace(5)
+ %1 = alloca [2 x i8], align 1, addrspace(5)
+ %2 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 0
+ %3 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 1
+ %4 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 2
+ %5 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 0
+ %6 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 1
+ store i8 0, i8 addrspace(5)* %2
+ store i8 1, i8 addrspace(5)* %3
+ store i8 2, i8 addrspace(5)* %4
+ store i8 1, i8 addrspace(5)* %5
+ store i8 0, i8 addrspace(5)* %6
+ %7 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 %in
+ %8 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 %in
+ %9 = load i8, i8 addrspace(5)* %7
+ %10 = load i8, i8 addrspace(5)* %8
%11 = add i8 %9, %10
%12 = sext i8 %11 to i32
store i32 %12, i32 addrspace(1)* %out
@@ -305,13 +305,13 @@
define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i8]]
- %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
- store i8 0, i8* %gep0
- store i8 1, i8* %gep1
- %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i8, i8* %gep2
+ %alloca = alloca [2 x [2 x i8]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i8 0, i8 addrspace(5)* %gep0
+ store i8 1, i8 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i8, i8 addrspace(5)* %gep2
%sext = sext i8 %load to i32
store i32 %sext, i32 addrspace(1)* %out
ret void
@@ -319,26 +319,26 @@
define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x i32]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i64]]
- %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
- store i64 0, i64* %gep0
- store i64 1, i64* %gep1
- %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i64, i64* %gep2
+ %alloca = alloca [2 x [2 x i64]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i64 0, i64 addrspace(5)* %gep0
+ store i64 1, i64 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i64, i64 addrspace(5)* %gep2
store i64 %load, i64 addrspace(1)* %out
ret void
}
@@ -347,40 +347,40 @@
define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x %struct.pair32]]
- %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index, i32 0
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x %struct.pair32]
- %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x %struct.pair32], addrspace(5)
+ %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 1, i32 0
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 %index, i32 0
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
%cmp = icmp eq i32 %in, 0
- %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
- %load = load i32, i32* %sel
+ %sel = select i1 %cmp, i32 addrspace(5)* %tmp1, i32 addrspace(5)* %tmp2
+ %load = load i32, i32 addrspace(5)* %sel
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -394,14 +394,14 @@
; SI: v_add_{{[iu]}}32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
- %alloca = alloca [16 x i32]
- %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
- store i32 5, i32* %tmp0
- %tmp1 = ptrtoint [16 x i32]* %alloca to i32
+ %alloca = alloca [16 x i32], addrspace(5)
+ %tmp0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
+ store i32 5, i32 addrspace(5)* %tmp0
+ %tmp1 = ptrtoint [16 x i32] addrspace(5)* %alloca to i32
%tmp2 = add i32 %tmp1, 5
- %tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = getelementptr i32, i32* %tmp3, i32 %b
- %tmp5 = load i32, i32* %tmp4
+ %tmp3 = inttoptr i32 %tmp2 to i32 addrspace(5)*
+ %tmp4 = getelementptr i32, i32 addrspace(5)* %tmp3, i32 %b
+ %tmp5 = load i32, i32 addrspace(5)* %tmp4
store i32 %tmp5, i32 addrspace(1)* %out
ret void
}
@@ -411,15 +411,15 @@
; OPT: load i32 addrspace(1)*, i32 addrspace(1)* addrspace(3)* %{{[0-9]+}}, align 4
define amdgpu_kernel void @pointer_typed_alloca(i32 addrspace(1)* %A) {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ %ld0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
store i32 1, i32 addrspace(1)* %arrayidx, align 4
- %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %ld1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
- %ld2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %ld2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %ld2, i32 2
store i32 3, i32 addrspace(1)* %arrayidx2, align 4
ret void
@@ -462,9 +462,9 @@
; SI: buffer_load_dword
define amdgpu_kernel void @v16i32_stack(<16 x i32> addrspace(1)* %out, i32 %a) {
- %alloca = alloca [2 x <16 x i32>]
- %tmp0 = getelementptr [2 x <16 x i32>], [2 x <16 x i32>]* %alloca, i32 0, i32 %a
- %tmp5 = load <16 x i32>, <16 x i32>* %tmp0
+ %alloca = alloca [2 x <16 x i32>], addrspace(5)
+ %tmp0 = getelementptr [2 x <16 x i32>], [2 x <16 x i32>] addrspace(5)* %alloca, i32 0, i32 %a
+ %tmp5 = load <16 x i32>, <16 x i32> addrspace(5)* %tmp0
store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out
ret void
}
@@ -506,9 +506,9 @@
; SI: buffer_load_dword
define amdgpu_kernel void @v16float_stack(<16 x float> addrspace(1)* %out, i32 %a) {
- %alloca = alloca [2 x <16 x float>]
- %tmp0 = getelementptr [2 x <16 x float>], [2 x <16 x float>]* %alloca, i32 0, i32 %a
- %tmp5 = load <16 x float>, <16 x float>* %tmp0
+ %alloca = alloca [2 x <16 x float>], addrspace(5)
+ %tmp0 = getelementptr [2 x <16 x float>], [2 x <16 x float>] addrspace(5)* %alloca, i32 0, i32 %a
+ %tmp5 = load <16 x float>, <16 x float> addrspace(5)* %tmp0
store <16 x float> %tmp5, <16 x float> addrspace(1)* %out
ret void
}
@@ -522,9 +522,9 @@
; SI: buffer_load_dword
define amdgpu_kernel void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) {
- %alloca = alloca [16 x <2 x float>]
- %tmp0 = getelementptr [16 x <2 x float>], [16 x <2 x float>]* %alloca, i32 0, i32 %a
- %tmp5 = load <2 x float>, <2 x float>* %tmp0
+ %alloca = alloca [16 x <2 x float>], addrspace(5)
+ %tmp0 = getelementptr [16 x <2 x float>], [16 x <2 x float>] addrspace(5)* %alloca, i32 0, i32 %a
+ %tmp5 = load <2 x float>, <2 x float> addrspace(5)* %tmp0
store <2 x float> %tmp5, <2 x float> addrspace(1)* %out
ret void
}
@@ -534,9 +534,9 @@
; OPT: load [0 x i32], [0 x i32] addrspace(3)*
define amdgpu_kernel void @direct_alloca_read_0xi32([0 x i32] addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [0 x i32]
- store [0 x i32] [], [0 x i32]* %tmp
- %load = load [0 x i32], [0 x i32]* %tmp
+ %tmp = alloca [0 x i32], addrspace(5)
+ store [0 x i32] [], [0 x i32] addrspace(5)* %tmp
+ %load = load [0 x i32], [0 x i32] addrspace(5)* %tmp
store [0 x i32] %load, [0 x i32] addrspace(1)* %out
ret void
}
@@ -546,9 +546,9 @@
; OPT: load [1 x i32], [1 x i32] addrspace(3)*
define amdgpu_kernel void @direct_alloca_read_1xi32([1 x i32] addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [1 x i32]
- store [1 x i32] [i32 0], [1 x i32]* %tmp
- %load = load [1 x i32], [1 x i32]* %tmp
+ %tmp = alloca [1 x i32], addrspace(5)
+ store [1 x i32] [i32 0], [1 x i32] addrspace(5)* %tmp
+ %load = load [1 x i32], [1 x i32] addrspace(5)* %tmp
store [1 x i32] %load, [1 x i32] addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index baa56de..3aaa589 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -17,14 +17,14 @@
; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:
; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]:
-define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32* %out) {
+define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) {
entry:
- %v = alloca [2 x i32]
- %vv = bitcast [2 x i32]* %v to <2 x i32>*
- store <2 x i32> %in, <2 x i32>* %vv
- %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx
- %x = load i32, i32* %e
- store i32 %x, i32* %out
+ %v = alloca [2 x i32], addrspace(5)
+ %vv = bitcast [2 x i32] addrspace(5)* %v to <2 x i32> addrspace(5)*
+ store <2 x i32> %in, <2 x i32> addrspace(5)* %vv
+ %e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v, i32 0, i32 %idx
+ %x = load i32, i32 addrspace(5)* %e
+ store i32 %x, i32 addrspace(5)* %out
ret void
}
@@ -41,14 +41,14 @@
; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:
; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]:
-define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32* %out) #0 {
+define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) #0 {
entry:
- %v = alloca [2 x i32]
- %vv = bitcast [2 x i32]* %v to <2 x i32>*
- store <2 x i32> %in, <2 x i32>* %vv
- %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx
- %x = load i32, i32* %e
- store i32 %x, i32* %out
+ %v = alloca [2 x i32], addrspace(5)
+ %vv = bitcast [2 x i32] addrspace(5)* %v to <2 x i32> addrspace(5)*
+ store <2 x i32> %in, <2 x i32> addrspace(5)* %vv
+ %e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v, i32 0, i32 %idx
+ %x = load i32, i32 addrspace(5)* %e
+ store i32 %x, i32 addrspace(5)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 3059a95..266df4d 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -176,57 +176,57 @@
; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
+ store volatile i32 0, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #11 {
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #1 {
- %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 {
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
+ %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
+ store volatile i32 0, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
+; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
- store volatile i32 0, i32* %ftos
+; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
+ store volatile i32 0, i32 addrspace(5)* %ftos
ret void
}
; No-op addrspacecast should not use queue ptr
; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
+ store volatile i32 0, i32* %stof
ret void
}
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
- %ld = load volatile i32, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(2)* %ptr to i32*
+ %ld = load volatile i32, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
+; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
+; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
%ld = load volatile i32, i32 addrspace(2)* %ftos
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
index c1b8792..8e7f40a 100644
--- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -20,12 +20,12 @@
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
-; getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b
+; getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {
- %alloca = alloca [16 x i32], align 16
+ %alloca = alloca [16 x i32], align 16, addrspace(5)
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
%a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid
@@ -33,11 +33,11 @@
%a = load i32, i32 addrspace(1)* %a_ptr, !range !0
%b = load i32, i32 addrspace(1)* %b_ptr, !range !0
%result = add i32 %a, %b
- %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b
- store i32 %result, i32* %alloca_ptr, align 4
+ %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b
+ store i32 %result, i32 addrspace(5)* %alloca_ptr, align 4
; Dummy call
call void @llvm.amdgcn.s.barrier()
- %reload = load i32, i32* %alloca_ptr, align 4, !range !0
+ %reload = load i32, i32 addrspace(5)* %alloca_ptr, align 4, !range !0
%out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index 732142c..f164d81 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -14,16 +14,16 @@
; GCN-NOT: s32
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}}
; GCN-NOT: s32
-define void @void_func_byval_struct(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 {
+define void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
entry:
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- %tmp = load volatile i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
%add = add nsw i32 %tmp, 1
- store volatile i32 %add, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- %tmp1 = load volatile i32, i32* %arrayidx2, align 4
+ store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
%add3 = add nsw i32 %tmp1, 2
- store volatile i32 %add3, i32* %arrayidx2, align 4
+ store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
store volatile i32 9, i32 addrspace(1)* null, align 4
ret void
}
@@ -54,17 +54,17 @@
; GCN: buffer_load_dword v33,
; GCN: s_sub_u32 s32, s32, 0xb00{{$}}
; GCN: s_setpc_b64
-define void @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 {
+define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
entry:
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- %tmp = load volatile i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
%add = add nsw i32 %tmp, 1
- store volatile i32 %add, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- %tmp1 = load volatile i32, i32* %arrayidx2, align 4
+ store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
%add3 = add nsw i32 %tmp1, 2
call void @external_void_func_void()
- store volatile i32 %add3, i32* %arrayidx2, align 4
+ store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
store volatile i32 9, i32 addrspace(1)* null, align 4
ret void
}
@@ -114,19 +114,19 @@
; GCN-NEXT: s_setpc_b64
define void @call_void_func_byval_struct_func() #0 {
entry:
- %arg0 = alloca %struct.ByValStruct, align 4
- %arg1 = alloca %struct.ByValStruct, align 4
- %tmp = bitcast %struct.ByValStruct* %arg0 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp)
- %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1)
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- store volatile i32 9, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- store volatile i32 13, i32* %arrayidx2, align 4
- call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp)
+ %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
+ %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
+ call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
ret void
}
@@ -167,45 +167,45 @@
; GCN: s_endpgm
define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 {
entry:
- %arg0 = alloca %struct.ByValStruct, align 4
- %arg1 = alloca %struct.ByValStruct, align 4
- %tmp = bitcast %struct.ByValStruct* %arg0 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp)
- %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1)
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- store volatile i32 9, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- store volatile i32 13, i32* %arrayidx2, align 4
- call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp)
+ %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
+ %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
+ call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
ret void
}
; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim:
define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 {
entry:
- %arg0 = alloca %struct.ByValStruct, align 4
- %arg1 = alloca %struct.ByValStruct, align 4
- %tmp = bitcast %struct.ByValStruct* %arg0 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp)
- %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1)
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- store volatile i32 9, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- store volatile i32 13, i32* %arrayidx2, align 4
- call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp)
+ %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
+ %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
+ call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
ret void
}
declare void @external_void_func_void() #0
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #3
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #3
+declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3
+declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3
attributes #0 = { nounwind }
attributes #1 = { noinline norecurse nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index e42546e..5ad8d92 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -38,8 +38,8 @@
; Structs
declare void @external_void_func_struct_i8_i32({ i8, i32 }) #0
-declare void @external_void_func_byval_struct_i8_i32({ i8, i32 }* byval) #0
-declare void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 }* sret, { i8, i32 }* byval) #0
+declare void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval) #0
+declare void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret, { i8, i32 } addrspace(5)* byval) #0
declare void @external_void_func_v16i8(<16 x i8>) #0
@@ -465,12 +465,12 @@
; GCN-NEXT: s_swappc_b64
; GCN-NOT: [[SP]]
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
- %val = alloca { i8, i32 }, align 4
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 1
- store i8 3, i8* %gep0
- store i32 8, i32* %gep1
- call void @external_void_func_byval_struct_i8_i32({ i8, i32 }* %val)
+ %val = alloca { i8, i32 }, align 4, addrspace(5)
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
+ store i8 3, i8 addrspace(5)* %gep0
+ store i32 8, i32 addrspace(5)* %gep1
+ call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %val)
ret void
}
@@ -497,17 +497,17 @@
; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
- %in.val = alloca { i8, i32 }, align 4
- %out.val = alloca { i8, i32 }, align 4
- %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %in.val, i32 0, i32 0
- %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %in.val, i32 0, i32 1
- store i8 3, i8* %in.gep0
- store i32 8, i32* %in.gep1
- call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 }* %out.val, { i8, i32 }* %in.val)
- %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %out.val, i32 0, i32 0
- %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %out.val, i32 0, i32 1
- %out.val0 = load i8, i8* %out.gep0
- %out.val1 = load i32, i32* %out.gep1
+ %in.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %out.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
+ %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
+ store i8 3, i8 addrspace(5)* %in.gep0
+ store i32 8, i32 addrspace(5)* %in.gep1
+ call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val)
+ %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
+ %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
+ %out.val0 = load i8, i8 addrspace(5)* %out.gep0
+ %out.val1 = load i32, i32 addrspace(5)* %out.gep1
store volatile i8 %out.val0, i8 addrspace(1)* undef
store volatile i32 %out.val1, i32 addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index 58e549e..f380bf5 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -132,24 +132,24 @@
; GCN-LABEL: {{^}}use_stack0:
; GCN: ScratchSize: 2052
define void @use_stack0() #1 {
- %alloca = alloca [512 x i32], align 4
- call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
ret void
}
; GCN-LABEL: {{^}}use_stack1:
; GCN: ScratchSize: 404
define void @use_stack1() #1 {
- %alloca = alloca [100 x i32], align 4
- call void asm sideeffect "; use $0", "v"([100 x i32]* %alloca) #0
+ %alloca = alloca [100 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([100 x i32] addrspace(5)* %alloca) #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_stack:
; GCN: ScratchSize: 2124
define void @indirect_use_stack() #1 {
- %alloca = alloca [16 x i32], align 4
- call void asm sideeffect "; use $0", "v"([16 x i32]* %alloca) #0
+ %alloca = alloca [16 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([16 x i32] addrspace(5)* %alloca) #0
call void @use_stack0()
ret void
}
@@ -201,8 +201,8 @@
; GCN-LABEL: {{^}}direct_recursion_use_stack:
; GCN: ScratchSize: 2056
define void @direct_recursion_use_stack(i32 %val) #2 {
- %alloca = alloca [512 x i32], align 4
- call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %ret, label %call
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 88d1651..2c8abf5 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -28,8 +28,8 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack() #0 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
ret void
}
@@ -57,8 +57,8 @@
; GCN: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_and_call() #0 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
call void @external_void_func_void()
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
index 343ba69..989d1b7 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -43,8 +43,8 @@
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
define void @use_queue_ptr_addrspacecast() #1 {
- %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %asc
+ %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
+ store volatile i32 0, i32* %asc
ret void
}
@@ -113,8 +113,8 @@
; GCN: ; use s6
; GCN: s_setpc_b64
define void @use_stack_workgroup_id_x() #1 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%val = call i32 @llvm.amdgcn.workgroup.id.x()
call void asm sideeffect "; use $0", "s"(i32 %val)
ret void
@@ -432,8 +432,8 @@
; GCN: ; use s15
; GCN: ; use s16
define void @use_every_sgpr_input() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
@@ -512,8 +512,8 @@
; GCN-DAG: s_mov_b32 s8, s16
; GCN: s_swappc_b64
define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
@@ -568,10 +568,10 @@
; GCN: ; use [[SAVE_Y]]
; GCN: ; use [[SAVE_Z]]
define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
- %alloca = alloca i32, align 4
+ %alloca = alloca i32, align 4, addrspace(5)
call void @use_workgroup_id_xyz()
- store volatile i32 0, i32* %alloca
+ store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index f5e68d1..13cb8b5 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -368,7 +368,7 @@
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
- i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32* byval %arg32) #1 {
+ i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %val, i32 addrspace(1)* undef
@@ -407,7 +407,7 @@
store volatile i32 %arg29, i32 addrspace(1)* undef
store volatile i32 %arg30, i32 addrspace(1)* undef
store volatile i32 %arg31, i32 addrspace(1)* undef
- %private = load volatile i32, i32* %arg32
+ %private = load volatile i32, i32 addrspace(5)* %arg32
ret void
}
@@ -435,8 +435,8 @@
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 999, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 999, i32 addrspace(5)* %alloca
call void @too_many_args_use_workitem_id_x_byval(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -446,7 +446,7 @@
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32* %alloca)
+ i32 addrspace(5)* %alloca)
ret void
}
@@ -460,8 +460,8 @@
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
; GCN: s_swappc_b64
define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 999, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 999, i32 addrspace(5)* %alloca
call void @too_many_args_use_workitem_id_x_byval(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -471,7 +471,7 @@
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32* %alloca)
+ i32 addrspace(5)* %alloca)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 99f6ec4..a091811 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -5,10 +5,10 @@
; GCN: buffer_store_dword [[FI]]
define amdgpu_kernel void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %b = alloca i8
- call void @llvm.lifetime.start.p0i8(i64 1, i8* %b)
- store volatile i8* %b, i8* addrspace(1)* undef
- call void @llvm.lifetime.end.p0i8(i64 1, i8* %b)
+ %b = alloca i8, addrspace(5)
+ call void @llvm.lifetime.start.p5i8(i64 1, i8 addrspace(5)* %b)
+ store volatile i8 addrspace(5)* %b, i8 addrspace(5)* addrspace(1)* undef
+ call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* %b)
ret void
}
@@ -18,10 +18,10 @@
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}}
; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
-define amdgpu_kernel void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
- %tmp = alloca float
- store float 4.0, float *%tmp
- store float* %tmp, float* addrspace(3)* %ptr
+define amdgpu_kernel void @stored_fi_to_lds(float addrspace(5)* addrspace(3)* %ptr) #0 {
+ %tmp = alloca float, addrspace(5)
+ store float 4.0, float addrspace(5)*%tmp
+ store float addrspace(5)* %tmp, float addrspace(5)* addrspace(3)* %ptr
ret void
}
@@ -38,13 +38,13 @@
; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]]
-define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
- %tmp0 = alloca float
- %tmp1 = alloca float
- store float 4.0, float* %tmp0
- store float 4.0, float* %tmp1
- store volatile float* %tmp0, float* addrspace(3)* %ptr
- store volatile float* %tmp1, float* addrspace(3)* %ptr
+define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float addrspace(5)* addrspace(3)* %ptr) #0 {
+ %tmp0 = alloca float, addrspace(5)
+ %tmp1 = alloca float, addrspace(5)
+ store float 4.0, float addrspace(5)* %tmp0
+ store float 4.0, float addrspace(5)* %tmp1
+ store volatile float addrspace(5)* %tmp0, float addrspace(5)* addrspace(3)* %ptr
+ store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(3)* %ptr
ret void
}
@@ -55,12 +55,12 @@
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @stored_fi_to_self() #0 {
- %tmp = alloca i32*
+ %tmp = alloca i32 addrspace(5)*, addrspace(5)
; Avoid optimizing everything out
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp
- %bitcast = bitcast i32** %tmp to i32*
- store volatile i32* %bitcast, i32** %tmp
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp
+ %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp to i32 addrspace(5)*
+ store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp
ret void
}
@@ -74,17 +74,17 @@
; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x804{{$}}
; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2052{{$}}
define amdgpu_kernel void @stored_fi_to_self_offset() #0 {
- %tmp0 = alloca [512 x i32]
- %tmp1 = alloca i32*
+ %tmp0 = alloca [512 x i32], addrspace(5)
+ %tmp1 = alloca i32 addrspace(5)*, addrspace(5)
; Avoid optimizing everything out
- %tmp0.cast = bitcast [512 x i32]* %tmp0 to i32*
- store volatile i32 32, i32* %tmp0.cast
+ %tmp0.cast = bitcast [512 x i32] addrspace(5)* %tmp0 to i32 addrspace(5)*
+ store volatile i32 32, i32 addrspace(5)* %tmp0.cast
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp1
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1
- %bitcast = bitcast i32** %tmp1 to i32*
- store volatile i32* %bitcast, i32** %tmp1
+ %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)*
+ store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp1
ret void
}
@@ -99,18 +99,18 @@
; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
define amdgpu_kernel void @stored_fi_to_fi() #0 {
- %tmp0 = alloca i32*
- %tmp1 = alloca i32*
- %tmp2 = alloca i32*
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp0
- store volatile i32* inttoptr (i32 5678 to i32*), i32** %tmp1
- store volatile i32* inttoptr (i32 9999 to i32*), i32** %tmp2
+ %tmp0 = alloca i32 addrspace(5)*, addrspace(5)
+ %tmp1 = alloca i32 addrspace(5)*, addrspace(5)
+ %tmp2 = alloca i32 addrspace(5)*, addrspace(5)
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp0
+ store volatile i32 addrspace(5)* inttoptr (i32 5678 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1
+ store volatile i32 addrspace(5)* inttoptr (i32 9999 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp2
- %bitcast1 = bitcast i32** %tmp1 to i32*
- %bitcast2 = bitcast i32** %tmp2 to i32* ; at offset 8
+ %bitcast1 = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)*
+ %bitcast2 = bitcast i32 addrspace(5)* addrspace(5)* %tmp2 to i32 addrspace(5)* ; at offset 8
- store volatile i32* %bitcast1, i32** %tmp2 ; store offset 4 at offset 8
- store volatile i32* %bitcast2, i32** %tmp1 ; store offset 8 at offset 4
+ store volatile i32 addrspace(5)* %bitcast1, i32 addrspace(5)* addrspace(5)* %tmp2 ; store offset 4 at offset 8
+ store volatile i32 addrspace(5)* %bitcast2, i32 addrspace(5)* addrspace(5)* %tmp1 ; store offset 8 at offset 4
ret void
}
@@ -118,10 +118,10 @@
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[FI]]
-define amdgpu_kernel void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
- %tmp = alloca float
- store float 0.0, float *%tmp
- store float* %tmp, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_fi_to_global(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp = alloca float, addrspace(5)
+ store float 0.0, float addrspace(5)*%tmp
+ store float addrspace(5)* %tmp, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -136,15 +136,15 @@
; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
- %tmp0 = alloca float
- %tmp1 = alloca float
- %tmp2 = alloca float
- store volatile float 0.0, float *%tmp0
- store volatile float 0.0, float *%tmp1
- store volatile float 0.0, float *%tmp2
- store volatile float* %tmp1, float* addrspace(1)* %ptr
- store volatile float* %tmp2, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp0 = alloca float, addrspace(5)
+ %tmp1 = alloca float, addrspace(5)
+ %tmp2 = alloca float, addrspace(5)
+ store volatile float 0.0, float addrspace(5)*%tmp0
+ store volatile float 0.0, float addrspace(5)*%tmp1
+ store volatile float 0.0, float addrspace(5)*%tmp2
+ store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(1)* %ptr
+ store volatile float addrspace(5)* %tmp2, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -163,19 +163,19 @@
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
- %tmp0 = alloca [4096 x i32]
- %tmp1 = alloca [4096 x i32]
- %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 0
- store volatile i32 0, i32* %gep0.tmp0
- %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 4095
- store volatile i32 999, i32* %gep1.tmp0
- %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 14
- store i32* %gep0.tmp1, i32* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32 addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp0 = alloca [4096 x i32], addrspace(5)
+ %tmp1 = alloca [4096 x i32], addrspace(5)
+ %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 0
+ store volatile i32 0, i32 addrspace(5)* %gep0.tmp0
+ %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 4095
+ store volatile i32 999, i32 addrspace(5)* %gep1.tmp0
+ %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 14
+ store i32 addrspace(5)* %gep0.tmp1, i32 addrspace(5)* addrspace(1)* %ptr
ret void
}
-@g1 = external addrspace(1) global i32*
+@g1 = external addrspace(1) global i32 addrspace(5)*
; This was leaving a dead node around resulting in failing to select
; on the leftover AssertZext's ValueType operand.
@@ -188,16 +188,16 @@
; GCN: buffer_store_dword [[FI]]
define amdgpu_kernel void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
entry:
- %b = alloca i32, align 4
- %tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4
- %arrayidx = getelementptr inbounds i32, i32* %tmp1, i32 %idx
- %tmp2 = load i32, i32* %arrayidx, align 4
- store volatile i32* %b, i32* addrspace(1)* undef
+ %b = alloca i32, align 4, addrspace(5)
+ %tmp1 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* @g1, align 4
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %tmp1, i32 %idx
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx, align 4
+ store volatile i32 addrspace(5)* %b, i32 addrspace(5)* addrspace(1)* undef
ret void
}
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
+declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index c114332..714d433 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -6,32 +6,32 @@
; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
-; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in
+; OPT-CIVI: getelementptr i32, i32* %in
; OPT-CIVI: br i1
; OPT-CIVI-NOT: ptrtoint
; OPT-GFX9: br
-; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %0, i64 28
-; OPT-GFX9: %1 = bitcast i8 addrspace(4)* %sunkaddr to i32 addrspace(4)*
-; OPT-GFX9: load i32, i32 addrspace(4)* %1
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %0, i64 28
+; OPT-GFX9: %1 = bitcast i8* %sunkaddr to i32*
+; OPT-GFX9: load i32, i32* %1
; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
; GCN: flat_load_dword
; GCN: {{^}}BB0_2:
-define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+ %out.gep = getelementptr i32, i32* %out, i64 999999
+ %in.gep = getelementptr i32, i32* %in, i64 7
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(4)* %in.gep
+ %tmp1 = load i32, i32* %in.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -39,7 +39,7 @@
}
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT: getelementptr i32, i32* %out,
; rOPT-CI-NOT: getelementptr
; OPT: br i1
@@ -50,11 +50,11 @@
; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
-define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32* %out, i32* %in, i32 %cond) {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
- %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(1)*
+ %out.gep = getelementptr i32, i32* %out, i64 999999
+ %in.gep = getelementptr i32, i32* %in, i64 7
+ %cast = addrspacecast i32* %in.gep to i32 addrspace(1)*
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
@@ -64,7 +64,7 @@
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -72,7 +72,7 @@
}
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT: getelementptr i32, i32* %out,
; OPT-CI-NOT: getelementptr
; OPT: br i1
@@ -83,11 +83,11 @@
; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32* %out, i32* %in, i32 %cond) {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
- %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(2)*
+ %out.gep = getelementptr i32, i32* %out, i64 999999
+ %in.gep = getelementptr i32, i32* %in, i64 7
+ %cast = addrspacecast i32* %in.gep to i32 addrspace(2)*
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
@@ -97,7 +97,7 @@
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -105,34 +105,34 @@
}
; OPT-LABEL: @test_sink_flat_small_max_flat_offset(
-; OPT-CIVI: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+; OPT-CIVI: %in.gep = getelementptr i8, i8* %in, i64 4095
; OPT-CIVI: br
; OPT-CIVI-NOT: getelementptr
-; OPT-CIVI: load i8, i8 addrspace(4)* %in.gep
+; OPT-CIVI: load i8, i8* %in.gep
; OPT-GFX9: br
-; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %in, i64 4095
-; OPT-GFX9: load i8, i8 addrspace(4)* %sunkaddr
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %in, i64 4095
+; OPT-GFX9: load i8, i8* %sunkaddr
; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset:
; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32* %out, i8* %in) #1 {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
- %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+ %out.gep = getelementptr i32, i32* %out, i32 1024
+ %in.gep = getelementptr i8, i8* %in, i64 4095
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp1 = load i8, i8* %in.gep
%tmp2 = sext i8 %tmp1 to i32
br label %endif
endif:
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -140,29 +140,29 @@
}
; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
-; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+; OPT: %in.gep = getelementptr i8, i8* %in, i64 4096
; OPT: br
; OPT-NOT: getelementptr
-; OPT: load i8, i8 addrspace(4)* %in.gep
+; OPT: load i8, i8* %in.gep
; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset:
; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32* %out, i8* %in) #1 {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 99999
- %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+ %out.gep = getelementptr i32, i32* %out, i64 99999
+ %in.gep = getelementptr i8, i8* %in, i64 4096
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp1 = load i8, i8* %in.gep
%tmp2 = sext i8 %tmp1 to i32
br label %endif
endif:
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -170,30 +170,30 @@
}
; OPT-LABEL: @test_no_sink_flat_reg_offset(
-; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+; OPT: %in.gep = getelementptr i8, i8* %in, i64 %reg
; OPT: br
; OPT-NOT: getelementptr
-; OPT: load i8, i8 addrspace(4)* %in.gep
+; OPT: load i8, i8* %in.gep
; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset:
; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in, i64 %reg) #1 {
+define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32* %out, i8* %in, i64 %reg) #1 {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
- %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+ %out.gep = getelementptr i32, i32* %out, i32 1024
+ %in.gep = getelementptr i8, i8* %in, i64 %reg
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp1 = load i8, i8* %in.gep
%tmp2 = sext i8 %tmp1 to i32
br label %endif
endif:
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index beb0723..2769f5f 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -7,7 +7,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
; OPT-LABEL: @test_sink_global_small_offset_i32(
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
@@ -137,24 +137,24 @@
; GCN: {{^}}BB4_2:
define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
- %alloca = alloca [512 x i32], align 4
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%add.arg = add i32 %arg, 8
- %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1022
+ %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1022
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- store volatile i32 123, i32* %alloca.gep
- %tmp1 = load volatile i32, i32* %alloca.gep
+ store volatile i32 123, i32 addrspace(5)* %alloca.gep
+ %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(1)* %out.gep.0
- %load = load volatile i32, i32* %alloca.gep
+ %load = load volatile i32, i32 addrspace(5)* %alloca.gep
store i32 %load, i32 addrspace(1)* %out.gep.1
br label %done
@@ -178,24 +178,24 @@
define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
- %alloca = alloca [512 x i32], align 4
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%add.arg = add i32 %arg, 8
- %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
+ %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1023
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- store volatile i32 123, i32* %alloca.gep
- %tmp1 = load volatile i32, i32* %alloca.gep
+ store volatile i32 123, i32 addrspace(5)* %alloca.gep
+ %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(1)* %out.gep.0
- %load = load volatile i32, i32* %alloca.gep
+ %load = load volatile i32, i32 addrspace(5)* %alloca.gep
store i32 %load, i32 addrspace(1)* %out.gep.1
br label %done
@@ -204,7 +204,7 @@
}
; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
-; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024
; OPT: br i1
; OPT-NOT: ptrtoint
@@ -215,24 +215,24 @@
; GCN: {{^BB[0-9]+}}_2:
define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
- %alloca = alloca [512 x i32], align 4
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%add.arg = add i32 %arg, 8
- %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+ %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- store volatile i32 123, i32* %alloca.gep
- %tmp1 = load volatile i32, i32* %alloca.gep
+ store volatile i32 123, i32 addrspace(5)* %alloca.gep
+ %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(1)* %out.gep.0
- %load = load volatile i32, i32* %alloca.gep
+ %load = load volatile i32, i32 addrspace(5)* %alloca.gep
store i32 %load, i32 addrspace(1)* %out.gep.1
br label %done
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index f359ffc..2b36923 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -237,7 +237,7 @@
br i1 %tmp3, label %bb4, label %bb10
bb4: ; preds = %bb2
- %tmp6 = load float, float* undef
+ %tmp6 = load float, float addrspace(5)* undef
%tmp7 = fcmp olt float %tmp6, 0.0
br i1 %tmp7, label %bb8, label %Flow
@@ -257,7 +257,7 @@
br label %bb1
bb12: ; preds = %bb10
- store volatile <4 x float> %tmp11, <4 x float>* undef, align 16
+ store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
index caba83c..e12a968 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
@@ -703,9 +703,9 @@
; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]]
define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
entry:
- %stack0 = alloca i32
- %ptr0 = load volatile i32*, i32* addrspace(1)* undef
- %eq = icmp eq i32* %ptr0, %stack0
+ %stack0 = alloca i32, addrspace(5)
+ %ptr0 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* undef
+ %eq = icmp eq i32 addrspace(5)* %ptr0, %stack0
%ext = zext i1 %eq to i32
store volatile i32 %ext, i32 addrspace(1)* %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg.ll
index f35b070..d899ddd 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-to-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg.ll
@@ -8,20 +8,20 @@
; CHECK-LABEL: {{^}}copy_to_reg_frameindex:
define amdgpu_kernel void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
- %alloca = alloca [16 x i32]
+ %alloca = alloca [16 x i32], addrspace(5)
br label %loop
loop:
%inc = phi i32 [0, %entry], [%inc.i, %loop]
- %ptr = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %inc
- store i32 %inc, i32* %ptr
+ %ptr = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %inc
+ store i32 %inc, i32 addrspace(5)* %ptr
%inc.i = add i32 %inc, 1
%cnd = icmp uge i32 %inc.i, 16
br i1 %cnd, label %done, label %loop
done:
- %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
- %tmp1 = load i32, i32* %tmp0
+ %tmp0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
+ %tmp1 = load i32, i32 addrspace(5)* %tmp0
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/extload-private.ll b/llvm/test/CodeGen/AMDGPU/extload-private.ll
index fd298b3..f119af2 100644
--- a/llvm/test/CodeGen/AMDGPU/extload-private.ll
+++ b/llvm/test/CodeGen/AMDGPU/extload-private.ll
@@ -5,8 +5,8 @@
; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i8_sext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i8
- %tmp1 = load i8, i8* %tmp0
+ %tmp0 = alloca i8, addrspace(5)
+ %tmp1 = load i8, i8 addrspace(5)* %tmp0
%tmp2 = sext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -16,8 +16,8 @@
; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i8_zext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i8
- %tmp1 = load i8, i8* %tmp0
+ %tmp0 = alloca i8, addrspace(5)
+ %tmp1 = load i8, i8 addrspace(5)* %tmp0
%tmp2 = zext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -27,8 +27,8 @@
; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i16_sext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i16
- %tmp1 = load i16, i16* %tmp0
+ %tmp0 = alloca i16, addrspace(5)
+ %tmp1 = load i16, i16 addrspace(5)* %tmp0
%tmp2 = sext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -38,8 +38,8 @@
; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i16_zext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i16
- %tmp1 = load volatile i16, i16* %tmp0
+ %tmp0 = alloca i16, addrspace(5)
+ %tmp1 = load volatile i16, i16 addrspace(5)* %tmp0
%tmp2 = zext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
index e486b9c..5c45528 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -19,42 +19,42 @@
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
- %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
- store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
+ store volatile i32 %x, i32* %fptr, align 4
ret void
}
; CHECK-LABEL: {{^}}store_flat_i64:
; CHECK: flat_store_dwordx2
define amdgpu_kernel void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
- %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
- store volatile i64 %x, i64 addrspace(4)* %fptr, align 8
+ %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
+ store volatile i64 %x, i64* %fptr, align 8
ret void
}
; CHECK-LABEL: {{^}}store_flat_v4i32:
; CHECK: flat_store_dwordx4
define amdgpu_kernel void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
- %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
- store volatile <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
+ %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
+ store volatile <4 x i32> %x, <4 x i32>* %fptr, align 16
ret void
}
; CHECK-LABEL: {{^}}store_flat_trunc_i16:
; CHECK: flat_store_short
define amdgpu_kernel void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
- %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
%y = trunc i32 %x to i16
- store volatile i16 %y, i16 addrspace(4)* %fptr, align 2
+ store volatile i16 %y, i16* %fptr, align 2
ret void
}
; CHECK-LABEL: {{^}}store_flat_trunc_i8:
; CHECK: flat_store_byte
define amdgpu_kernel void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
- %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
%y = trunc i32 %x to i8
- store volatile i8 %y, i8 addrspace(4)* %fptr, align 2
+ store volatile i8 %y, i8* %fptr, align 2
ret void
}
@@ -63,8 +63,8 @@
; CHECK-LABEL: load_flat_i32:
; CHECK: flat_load_dword
define amdgpu_kernel void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
- %fload = load volatile i32, i32 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
+ %fload = load volatile i32, i32* %fptr, align 4
store i32 %fload, i32 addrspace(1)* %out, align 4
ret void
}
@@ -72,8 +72,8 @@
; CHECK-LABEL: load_flat_i64:
; CHECK: flat_load_dwordx2
define amdgpu_kernel void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
- %fload = load volatile i64, i64 addrspace(4)* %fptr, align 8
+ %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
+ %fload = load volatile i64, i64* %fptr, align 8
store i64 %fload, i64 addrspace(1)* %out, align 8
ret void
}
@@ -81,8 +81,8 @@
; CHECK-LABEL: load_flat_v4i32:
; CHECK: flat_load_dwordx4
define amdgpu_kernel void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
- %fload = load volatile <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 32
+ %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
+ %fload = load volatile <4 x i32>, <4 x i32>* %fptr, align 32
store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
ret void
}
@@ -90,8 +90,8 @@
; CHECK-LABEL: sextload_flat_i8:
; CHECK: flat_load_sbyte
define amdgpu_kernel void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
+ %fload = load volatile i8, i8* %fptr, align 4
%ext = sext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -100,8 +100,8 @@
; CHECK-LABEL: zextload_flat_i8:
; CHECK: flat_load_ubyte
define amdgpu_kernel void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
+ %fload = load volatile i8, i8* %fptr, align 4
%ext = zext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -110,8 +110,8 @@
; CHECK-LABEL: sextload_flat_i16:
; CHECK: flat_load_sshort
define amdgpu_kernel void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
+ %fload = load volatile i16, i16* %fptr, align 4
%ext = sext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -120,8 +120,8 @@
; CHECK-LABEL: zextload_flat_i16:
; CHECK: flat_load_ushort
define amdgpu_kernel void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
+ %fload = load volatile i16, i16* %fptr, align 4
%ext = zext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -133,9 +133,9 @@
; CHECK: flat_load_ubyte
; CHECK: flat_load_ubyte
define amdgpu_kernel void @flat_scratch_unaligned_load() {
- %scratch = alloca i32
- %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
- %ld = load volatile i32, i32 addrspace(4)* %fptr, align 1
+ %scratch = alloca i32, addrspace(5)
+ %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
+ %ld = load volatile i32, i32* %fptr, align 1
ret void
}
@@ -145,9 +145,9 @@
; CHECK: flat_store_byte
; CHECK: flat_store_byte
define amdgpu_kernel void @flat_scratch_unaligned_store() {
- %scratch = alloca i32
- %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %fptr, align 1
+ %scratch = alloca i32, addrspace(5)
+ %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
+ store volatile i32 0, i32* %fptr, align 1
ret void
}
@@ -156,9 +156,9 @@
; HSA: flat_load_dword
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define amdgpu_kernel void @flat_scratch_multidword_load() {
- %scratch = alloca <2 x i32>
- %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
- %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr
+ %scratch = alloca <2 x i32>, addrspace(5)
+ %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
+ %ld = load volatile <2 x i32>, <2 x i32>* %fptr
ret void
}
@@ -167,59 +167,59 @@
; HSA: flat_store_dword
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define amdgpu_kernel void @flat_scratch_multidword_store() {
- %scratch = alloca <2 x i32>
- %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
- store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr
+ %scratch = alloca <2 x i32>, addrspace(5)
+ %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
+ store volatile <2 x i32> zeroinitializer, <2 x i32>* %fptr
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
-define amdgpu_kernel void @store_flat_i8_max_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
- store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @store_flat_i8_max_offset(i8* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
+ store volatile i8 %x, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr, i8 %x) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
- store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
+ store volatile i8 %x, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @store_flat_i8_neg_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
- store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
+ store volatile i8 %x, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
-define amdgpu_kernel void @load_flat_i8_max_offset(i8 addrspace(4)* %fptr) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
- %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @load_flat_i8_max_offset(i8* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
+ %val = load volatile i8, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
- %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
+ %val = load volatile i8, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @load_flat_i8_neg_offset(i8 addrspace(4)* %fptr) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
- %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
+ %val = load volatile i8, i8* %fptr.offset
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
index dac1500..b2ac534 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
@@ -38,15 +38,15 @@
; NOHSA-NOADDR64: flat_store_dword
define amdgpu_kernel void @test_addr64(i32 addrspace(1)* %out) {
entry:
- %out.addr = alloca i32 addrspace(1)*, align 4
+ %out.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
- store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
- %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+ store i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(5)* %out.addr, align 4
+ %ld0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %out.addr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
store i32 1, i32 addrspace(1)* %arrayidx, align 4
- %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+ %ld1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %out.addr, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
index e3d307e..1edb486 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
@@ -5,29 +5,29 @@
; GCN-LABEL: {{^}}atomic_add_i32_offset:
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
-define amdgpu_kernel void @atomic_add_i32_max_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1023
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 1023
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 1024
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
@@ -35,22 +35,22 @@
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
@@ -58,60 +58,60 @@
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32:
; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_add_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile add i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_ret:
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile add i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_addr64:
; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_offset:
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
ret void
}
@@ -119,22 +119,22 @@
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
ret void
}
@@ -142,60 +142,60 @@
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32:
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_and_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile and i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_ret:
; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile and i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_addr64:
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile and i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile and i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_offset:
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
ret void
}
@@ -203,22 +203,22 @@
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
ret void
}
@@ -226,60 +226,60 @@
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32:
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile sub i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret:
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile sub i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_offset:
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
ret void
}
@@ -287,22 +287,22 @@
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
ret void
}
@@ -310,60 +310,60 @@
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32:
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_max_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile max i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_ret:
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile max i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_addr64:
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile max i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile max i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_offset:
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
ret void
}
@@ -371,22 +371,22 @@
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
ret void
}
@@ -394,60 +394,60 @@
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32:
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile umax i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret:
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile umax i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umax i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umax i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_offset:
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
ret void
}
@@ -455,22 +455,22 @@
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
ret void
}
@@ -478,60 +478,60 @@
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32:
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_min_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile min i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_ret:
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile min i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_addr64:
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile min i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile min i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_offset:
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
ret void
}
@@ -539,22 +539,22 @@
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
ret void
}
@@ -562,60 +562,60 @@
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32:
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile umin i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret:
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile umin i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umin i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}}
- define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+ define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umin i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_offset:
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
ret void
}
@@ -623,22 +623,22 @@
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
ret void
}
@@ -646,60 +646,60 @@
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32:
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_or_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile or i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_ret:
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile or i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_addr64:
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile or i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile or i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
ret void
}
@@ -707,22 +707,22 @@
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
ret void
}
@@ -730,50 +730,50 @@
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32:
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
@@ -782,10 +782,10 @@
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(4)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
@@ -793,23 +793,23 @@
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
@@ -817,63 +817,63 @@
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(4)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) {
entry:
- %val = cmpxchg volatile i32 addrspace(4)* %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) {
entry:
- %val = cmpxchg volatile i32 addrspace(4)* %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i32 addrspace(4)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i32 addrspace(4)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_offset:
; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
ret void
}
@@ -881,22 +881,22 @@
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
ret void
}
@@ -904,50 +904,50 @@
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32:
; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xor i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret:
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile xor i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xor i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xor i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
@@ -955,21 +955,21 @@
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %in, i32 4
- %val = load atomic i32, i32 addrspace(4)* %gep seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %gep = getelementptr i32, i32* %in, i32 4
+ %val = load atomic i32, i32* %gep seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i32:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32(i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -977,60 +977,60 @@
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %in, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = load atomic i32, i32 addrspace(4)* %gep seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %ptr = getelementptr i32, i32* %in, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = load atomic i32, i32* %gep seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i32_addr64:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %in, i64 %index
- %val = load atomic i32, i32 addrspace(4)* %ptr seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %ptr = getelementptr i32, i32* %in, i64 %index
+ %val = load atomic i32, i32* %ptr seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32_offset:
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- store atomic i32 %in, i32 addrspace(4)* %gep seq_cst, align 4
+ %gep = getelementptr i32, i32* %out, i32 4
+ store atomic i32 %in, i32* %gep seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4
+ store atomic i32 %in, i32* %out seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- store atomic i32 %in, i32 addrspace(4)* %gep seq_cst, align 4
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ store atomic i32 %in, i32* %gep seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32_addr64:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- store atomic i32 %in, i32 addrspace(4)* %ptr seq_cst, align 4
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ store atomic i32 %in, i32* %ptr seq_cst, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
index acbfe6a..ca1364e 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -3,973 +3,973 @@
; GCN-LABEL: {{^}}atomic_add_i64_offset:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_add_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_addr64:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_offset:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_addr64:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_offset:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_offset:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_addr64:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_offset:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_offset:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_addr64:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_offset:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_offset:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_addr64:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_offset:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64_offset:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64_offset(i64 addrspace(4)* %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %in, i64 4
- %val = load atomic i64, i64 addrspace(4)* %gep seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %gep = getelementptr i64, i64* %in, i64 4
+ %val = load atomic i64, i64* %gep seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64(i64 addrspace(4)* %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) {
entry:
- %val = load atomic i64, i64 addrspace(4)* %in seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %val = load atomic i64, i64* %in seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(4)* %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %in, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %val = load atomic i64, i64 addrspace(4)* %gep seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %ptr = getelementptr i64, i64* %in, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %val = load atomic i64, i64* %gep seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64_addr64:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(4)* %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %in, i64 %index
- %val = load atomic i64, i64 addrspace(4)* %ptr seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %ptr = getelementptr i64, i64* %in, i64 %index
+ %val = load atomic i64, i64* %ptr seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64_offset:
; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- store atomic i64 %in, i64 addrspace(4)* %gep seq_cst, align 8
+ %gep = getelementptr i64, i64* %out, i64 4
+ store atomic i64 %in, i64* %gep seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
-define amdgpu_kernel void @atomic_store_i64(i64 %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) {
entry:
- store atomic i64 %in, i64 addrspace(4)* %out seq_cst, align 8
+ store atomic i64 %in, i64* %out seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- store atomic i64 %in, i64 addrspace(4)* %gep seq_cst, align 8
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ store atomic i64 %in, i64* %gep seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64_addr64:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- store atomic i64 %in, i64 addrspace(4)* %ptr seq_cst, align 8
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ store atomic i64 %in, i64* %ptr seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64 addrspace(4)* %out, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(4)* %out, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 9000
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %gep = getelementptr i64, i64* %out, i64 9000
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64(i64 addrspace(4)* %out, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) {
entry:
- %val = cmpxchg volatile i64 addrspace(4)* %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
- %val = cmpxchg volatile i64 addrspace(4)* %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i64 addrspace(4)* %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i64 addrspace(4)* %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 9b75c44..8fdcfe5 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -18,8 +18,8 @@
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_mov_fi_i32() #0 {
- %alloca = alloca i32
- store volatile i32* %alloca, i32* addrspace(3)* undef
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(3)* undef
ret void
}
@@ -42,9 +42,9 @@
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_add_constant_to_fi_i32() #0 {
- %alloca = alloca [2 x i32], align 4
- %gep0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloca, i32 0, i32 1
- store volatile i32* %gep0, i32* addrspace(3)* undef
+ %alloca = alloca [2 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %alloca, i32 0, i32 1
+ store volatile i32 addrspace(5)* %gep0, i32 addrspace(5)* addrspace(3)* undef
ret void
}
@@ -64,8 +64,8 @@
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_i32() #0 {
- %alloca = alloca [2 x i32], align 4
- %ptrtoint = ptrtoint [2 x i32]* %alloca to i32
+ %alloca = alloca [2 x i32], align 4, addrspace(5)
+ %ptrtoint = ptrtoint [2 x i32] addrspace(5)* %alloca to i32
%mul = mul i32 %ptrtoint, 9
store volatile i32 %mul, i32 addrspace(3)* undef
ret void
@@ -74,16 +74,16 @@
; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr:
; GCN: v_mov_b32_e32 v1, 15{{$}}
; GCN: buffer_store_dword v1, v0, s[0:3], s4 offen{{$}}
-define void @func_store_private_arg_i32_ptr(i32* %ptr) #0 {
- store volatile i32 15, i32* %ptr
+define void @func_store_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
+ store volatile i32 15, i32 addrspace(5)* %ptr
ret void
}
; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr:
; GCN: s_waitcnt
; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], s4 offen{{$}}
-define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 {
- %val = load volatile i32, i32* %ptr
+define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
+ %val = load volatile i32, i32 addrspace(5)* %ptr
ret void
}
@@ -102,11 +102,11 @@
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
-define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 {
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
- %load1 = load i32, i32* %gep1
- store volatile i32* %gep1, i32* addrspace(3)* undef
+define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 } addrspace(5)* byval %arg0) #0 {
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
+ %load1 = load i32, i32 addrspace(5)* %gep1
+ store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef
ret void
}
@@ -115,11 +115,11 @@
; GCN-NEXT: s_mov_b32 s5, s32
; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s5
; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
-define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) #0 {
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
- %load0 = load i8, i8* %gep0
- %load1 = load i32, i32* %gep1
+define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* byval %arg0) #0 {
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
+ %load0 = load i8, i8 addrspace(5)* %gep0
+ %load1 = load i32, i32 addrspace(5)* %gep1
store volatile i8 %load0, i8 addrspace(3)* undef
store volatile i32 %load1, i32 addrspace(3)* undef
ret void
@@ -146,15 +146,15 @@
; GFX9: buffer_load_dword v1, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}}
; GCN: ds_write_b32
-define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 }* byval %arg0, i32 %arg2) #0 {
+define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval %arg0, i32 %arg2) #0 {
%cmp = icmp eq i32 %arg2, 0
br i1 %cmp, label %bb, label %ret
bb:
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
- %load1 = load volatile i32, i32* %gep1
- store volatile i32* %gep1, i32* addrspace(3)* undef
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
+ %load1 = load volatile i32, i32 addrspace(5)* %gep1
+ store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef
br label %ret
ret:
@@ -175,12 +175,12 @@
; GCN: v_mul_lo_i32 v0, v0, 9
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
- %alloca0 = alloca [128 x i32], align 4
- %alloca1 = alloca [8 x i32], align 4
- %gep0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca0, i32 0, i32 65
- %gep1 = getelementptr inbounds [8 x i32], [8 x i32]* %alloca1, i32 0, i32 0
- store volatile i32 7, i32* %gep0
- %ptrtoint = ptrtoint i32* %gep1 to i32
+ %alloca0 = alloca [128 x i32], align 4, addrspace(5)
+ %alloca1 = alloca [8 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65
+ %gep1 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %alloca1, i32 0, i32 0
+ store volatile i32 7, i32 addrspace(5)* %gep0
+ %ptrtoint = ptrtoint i32 addrspace(5)* %gep1 to i32
%mul = mul i32 %ptrtoint, 9
store volatile i32 %mul, i32 addrspace(3)* undef
ret void
@@ -199,20 +199,20 @@
; GCN: v_mul_lo_i32 v0, v0, 9
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
- %alloca0 = alloca [128 x i32], align 4
- %alloca1 = alloca [8 x i32], align 4
+ %alloca0 = alloca [128 x i32], align 4, addrspace(5)
+ %alloca1 = alloca [8 x i32], align 4, addrspace(5)
%vcc = call i64 asm sideeffect "; def $0", "={VCC}"()
- %gep0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca0, i32 0, i32 65
- %gep1 = getelementptr inbounds [8 x i32], [8 x i32]* %alloca1, i32 0, i32 0
- store volatile i32 7, i32* %gep0
+ %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65
+ %gep1 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %alloca1, i32 0, i32 0
+ store volatile i32 7, i32 addrspace(5)* %gep0
call void asm sideeffect "; use $0", "{VCC}"(i64 %vcc)
- %ptrtoint = ptrtoint i32* %gep1 to i32
+ %ptrtoint = ptrtoint i32 addrspace(5)* %gep1 to i32
%mul = mul i32 %ptrtoint, 9
store volatile i32 %mul, i32 addrspace(3)* undef
ret void
}
-declare void @func(<4 x float>* nocapture) #0
+declare void @func(<4 x float> addrspace(5)* nocapture) #0
; undef flag not preserved in eliminateFrameIndex when handling the
; stores in the middle block.
@@ -225,16 +225,16 @@
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:
define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
bb:
- %tmp = alloca <4 x float>, align 16
+ %tmp = alloca <4 x float>, align 16, addrspace(5)
%tmp2 = insertelement <4 x float> undef, float %arg, i32 0
- store <4 x float> %tmp2, <4 x float>* undef
+ store <4 x float> %tmp2, <4 x float> addrspace(5)* undef
%tmp3 = icmp eq i32 %arg1, 0
br i1 %tmp3, label %bb4, label %bb5
bb4:
- call void @func(<4 x float>* nonnull undef)
- store <4 x float> %tmp2, <4 x float>* %tmp, align 16
- call void @func(<4 x float>* nonnull %tmp)
+ call void @func(<4 x float> addrspace(5)* nonnull undef)
+ store <4 x float> %tmp2, <4 x float> addrspace(5)* %tmp, align 16
+ call void @func(<4 x float> addrspace(5)* nonnull %tmp)
br label %bb5
bb5:
@@ -245,15 +245,15 @@
; GCN: s_and_saveexec_b64
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s5 offset:12
define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
- %alloca0 = alloca { i8, i32 }, align 4
+ %alloca0 = alloca { i8, i32 }, align 4, addrspace(5)
%cmp = icmp eq i32 %arg0, 0
br i1 %cmp, label %bb, label %ret
bb:
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %alloca0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %alloca0, i32 0, i32 1
- %load1 = load volatile i32, i32* %gep1
- store volatile i32* %gep1, i32* addrspace(3)* undef
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %alloca0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %alloca0, i32 0, i32 1
+ %load1 = load volatile i32, i32 addrspace(5)* %gep1
+ store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef
br label %ret
ret:
diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll
index ca36732..323a6d3 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args.ll
@@ -506,8 +506,8 @@
; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
; GCN-DAG: buffer_store_dword v[[ELT1]]
; GCN-DAG: buffer_store_byte v[[ELT0]]
-define void @void_func_byval_struct_i8_i32({ i8, i32 }* byval %arg0) #0 {
- %arg0.load = load { i8, i32 }, { i8, i32 }* %arg0
+define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0) #0 {
+ %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
ret void
}
@@ -520,9 +520,9 @@
; GCN: ds_write_b32 v0, v0
; GCN: s_setpc_b64
-define void @void_func_byval_struct_i8_i32_x2({ i8, i32 }* byval %arg0, { i8, i32 }* byval %arg1, i32 %arg2) #0 {
- %arg0.load = load volatile { i8, i32 }, { i8, i32 }* %arg0
- %arg1.load = load volatile { i8, i32 }, { i8, i32 }* %arg1
+define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %arg0, { i8, i32 } addrspace(5)* byval %arg1, i32 %arg2) #0 {
+ %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
+ %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1
store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef
store volatile i32 %arg2, i32 addrspace(3)* undef
@@ -535,9 +535,9 @@
; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ARG1_LOAD0]]:[[ARG1_LOAD1]]{{\]}}, off
-define void @void_func_byval_i32_byval_i64(i32* byval %arg0, i64* byval %arg1) #0 {
- %arg0.load = load i32, i32* %arg0
- %arg1.load = load i64, i64* %arg1
+define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 {
+ %arg0.load = load i32, i32 addrspace(5)* %arg0
+ %arg1.load = load i64, i64 addrspace(5)* %arg1
store i32 %arg0.load, i32 addrspace(1)* undef
store i64 %arg1.load, i64 addrspace(1)* undef
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
index 06fc5ea..967dc75 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
@@ -276,7 +276,7 @@
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -443,7 +443,7 @@
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_struct(%struct.A* byval %a)
+define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a)
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
!kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
ret void
@@ -539,7 +539,7 @@
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: g
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -547,7 +547,7 @@
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: c
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -555,7 +555,7 @@
; CHECK-NEXT: AddrSpaceQual: Constant
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: l
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -594,7 +594,7 @@
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -603,7 +603,7 @@
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: IsVolatile: true
; CHECK-NEXT: - Name: b
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -613,7 +613,7 @@
; CHECK-NEXT: IsConst: true
; CHECK-NEXT: IsRestrict: true
; CHECK-NEXT: - Name: c
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: Pipe
@@ -1043,7 +1043,7 @@
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'int **'
+; CHECK-NEXT: TypeName: 'int addrspace(5)* addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -1067,7 +1067,7 @@
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_arg_ptr_to_ptr(i32* addrspace(1)* %a)
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a)
!kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
!kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
ret void
@@ -1103,7 +1103,7 @@
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B* byval %a)
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a)
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
!kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
ret void
@@ -1115,7 +1115,7 @@
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'global int* __attribute__((ext_vector_type(2)))'
+; CHECK-NEXT: TypeName: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))'
; CHECK-NEXT: Size: 16
; CHECK-NEXT: Align: 16
; CHECK-NEXT: ValueKind: ByValue
@@ -1187,7 +1187,7 @@
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'long *'
+; CHECK-NEXT: TypeName: 'long addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -1195,7 +1195,7 @@
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: b
-; CHECK-NEXT: TypeName: 'char *'
+; CHECK-NEXT: TypeName: 'char addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1204,7 +1204,7 @@
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: c
-; CHECK-NEXT: TypeName: 'char2 *'
+; CHECK-NEXT: TypeName: 'char2 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1213,7 +1213,7 @@
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: d
-; CHECK-NEXT: TypeName: 'char3 *'
+; CHECK-NEXT: TypeName: 'char3 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1222,7 +1222,7 @@
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: e
-; CHECK-NEXT: TypeName: 'char4 *'
+; CHECK-NEXT: TypeName: 'char4 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1231,7 +1231,7 @@
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: f
-; CHECK-NEXT: TypeName: 'char8 *'
+; CHECK-NEXT: TypeName: 'char8 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1240,7 +1240,7 @@
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: g
-; CHECK-NEXT: TypeName: 'char16 *'
+; CHECK-NEXT: TypeName: 'char16 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1309,7 +1309,7 @@
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @__test_block_invoke_kernel(
- <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg) #0
+ <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110
!kernel_arg_base_type !110 !kernel_arg_type_qual !4 {
ret void
@@ -1380,7 +1380,7 @@
!13 = !{!"half8"}
!14 = !{!"float16"}
!15 = !{!"double16"}
-!16 = !{!"int *"}
+!16 = !{!"int addrspace(5)*"}
!17 = !{!"image2d_t"}
!18 = !{!"sampler_t"}
!19 = !{!"queue_t"}
@@ -1396,23 +1396,23 @@
!29 = !{i8 undef, i32 1}
!30 = !{i16 undef, i32 1}
!31 = !{i64 undef, i32 1}
-!32 = !{i32 *undef, i32 1}
+!32 = !{i32 addrspace(5)*undef, i32 1}
!50 = !{i32 1, i32 2, i32 3}
-!51 = !{!"int *", !"int *", !"int *"}
+!51 = !{!"int addrspace(5)*", !"int addrspace(5)*", !"int addrspace(5)*"}
!60 = !{i32 1, i32 1, i32 1}
!61 = !{!"read_only", !"write_only", !"read_write"}
!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
!70 = !{!"volatile", !"const restrict", !"pipe"}
-!80 = !{!"int **"}
+!80 = !{!"int addrspace(5)* addrspace(5)*"}
!81 = !{i32 1}
!82 = !{!"struct B"}
-!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
+!83 = !{!"global int addrspace(5)* __attribute__((ext_vector_type(2)))"}
!84 = !{!"clk_event_t"}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
-!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
+!93 = !{!"long addrspace(5)*", !"char addrspace(5)*", !"char2 addrspace(5)*", !"char3 addrspace(5)*", !"char4 addrspace(5)*", !"char8 addrspace(5)*", !"char16 addrspace(5)*"}
!94 = !{!"", !"", !"", !"", !"", !"", !""}
!100 = !{!"1:1:4:%d\5Cn"}
!101 = !{!"2:1:8:%g\5Cn"}
diff --git a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
index 7e2d0b0..8e54dcb 100644
--- a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
@@ -5,9 +5,9 @@
; GCN-NOT: [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
define amdgpu_kernel void @scratch_buffer_known_high_bit_small() #0 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
- %toint = ptrtoint i32* %alloca to i32
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
+ %toint = ptrtoint i32 addrspace(5)* %alloca to i32
%masked = and i32 %toint, 2147483647
store volatile i32 %masked, i32 addrspace(1)* undef
ret void
@@ -19,9 +19,9 @@
; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
define amdgpu_kernel void @scratch_buffer_known_high_bit_huge() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
- %toint = ptrtoint i32* %alloca to i32
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
+ %toint = ptrtoint i32 addrspace(5)* %alloca to i32
%masked = and i32 %toint, 2147483647
store volatile i32 %masked, i32 addrspace(1)* undef
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
index 7f08a89..b3a3396 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -22,11 +22,11 @@
; CI-PROMOTE: ds_read_b64
define amdgpu_kernel void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) #1 {
%val = load double, double addrspace(1)* %in, align 8
- %array = alloca [8 x double], align 8
- %ptr = getelementptr inbounds [8 x double], [8 x double]* %array, i32 0, i32 %b
- store double %val, double* %ptr, align 8
+ %array = alloca [8 x double], align 8, addrspace(5)
+ %ptr = getelementptr inbounds [8 x double], [8 x double] addrspace(5)* %array, i32 0, i32 %b
+ store double %val, double addrspace(5)* %ptr, align 8
call void @llvm.amdgcn.s.barrier()
- %result = load double, double* %ptr, align 8
+ %result = load double, double addrspace(5)* %ptr, align 8
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -53,11 +53,11 @@
; CI-PROMOTE: ds_read2_b64
define amdgpu_kernel void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
- %array = alloca [4 x <2 x double>], align 16
- %ptr = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* %array, i32 0, i32 %b
- store <2 x double> %val, <2 x double>* %ptr, align 16
+ %array = alloca [4 x <2 x double>], align 16, addrspace(5)
+ %ptr = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>] addrspace(5)* %array, i32 0, i32 %b
+ store <2 x double> %val, <2 x double> addrspace(5)* %ptr, align 16
call void @llvm.amdgcn.s.barrier()
- %result = load <2 x double>, <2 x double>* %ptr, align 16
+ %result = load <2 x double>, <2 x double> addrspace(5)* %ptr, align 16
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
ret void
}
@@ -79,11 +79,11 @@
; CI-PROMOTE: ds_read_b64
define amdgpu_kernel void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) #1 {
%val = load i64, i64 addrspace(1)* %in, align 8
- %array = alloca [8 x i64], align 8
- %ptr = getelementptr inbounds [8 x i64], [8 x i64]* %array, i32 0, i32 %b
- store i64 %val, i64* %ptr, align 8
+ %array = alloca [8 x i64], align 8, addrspace(5)
+ %ptr = getelementptr inbounds [8 x i64], [8 x i64] addrspace(5)* %array, i32 0, i32 %b
+ store i64 %val, i64 addrspace(5)* %ptr, align 8
call void @llvm.amdgcn.s.barrier()
- %result = load i64, i64* %ptr, align 8
+ %result = load i64, i64 addrspace(5)* %ptr, align 8
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
@@ -111,11 +111,11 @@
; CI-PROMOTE: ds_read2_b64
define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
- %array = alloca [4 x <2 x i64>], align 16
- %ptr = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %array, i32 0, i32 %b
- store <2 x i64> %val, <2 x i64>* %ptr, align 16
+ %array = alloca [4 x <2 x i64>], align 16, addrspace(5)
+ %ptr = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>] addrspace(5)* %array, i32 0, i32 %b
+ store <2 x i64> %val, <2 x i64> addrspace(5)* %ptr, align 16
call void @llvm.amdgcn.s.barrier()
- %result = load <2 x i64>, <2 x i64>* %ptr, align 16
+ %result = load <2 x i64>, <2 x i64> addrspace(5)* %ptr, align 16
store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/insert_subreg.ll b/llvm/test/CodeGen/AMDGPU/insert_subreg.ll
index dcf6fe8..2dc9277 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_subreg.ll
@@ -8,8 +8,8 @@
; CHECK-LABEL: test:
define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
entry:
- %tmp0 = alloca [16 x i32]
- %tmp1 = ptrtoint [16 x i32]* %tmp0 to i32
+ %tmp0 = alloca [16 x i32], addrspace(5)
+ %tmp1 = ptrtoint [16 x i32] addrspace(5)* %tmp0 to i32
%tmp2 = sext i32 %tmp1 to i64
store i64 %tmp2, i64 addrspace(1)* %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index d4f737e..e39fb65 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -17,9 +17,9 @@
define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !5, metadata !11), !dbg !12
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !5, metadata !11), !dbg !12
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll b/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
index 8e358ef..570498e 100644
--- a/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
@@ -6,39 +6,39 @@
; CHECK-LABEL: {{^}}no_args:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @no_args() {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align32:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align32(<8 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align64:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align64(<16 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align128:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align128(<32 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align256:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align256(<64 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
index 4af37d8..d8cf523 100644
--- a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -46,14 +46,14 @@
; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen
; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen
-; Scratch size = alloca size + emergency stack slot
+; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5)
; ALL: ; ScratchSize: 32772
define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
- %large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
- store volatile i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
- %val = load volatile i32, i32* %gep1
+ %large = alloca [8192 x i32], align 4, addrspace(5)
+ %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191
+ store volatile i32 %x, i32 addrspace(5)* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y
+ %val = load volatile i32, i32 addrspace(5)* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
index 28b819a..e69ddbe 100644
--- a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
@@ -15,11 +15,11 @@
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
- %large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
- store volatile i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
- %val = load volatile i32, i32* %gep1
+ %large = alloca [8192 x i32], align 4, addrspace(5)
+ %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191
+ store volatile i32 %x, i32 addrspace(5)* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y
+ %val = load volatile i32, i32 addrspace(5)* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
@@ -37,11 +37,11 @@
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
- %large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
- store volatile i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
- %val = load volatile i32, i32* %gep1
+ %large = alloca [8192 x i32], align 4, addrspace(5)
+ %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191
+ store volatile i32 %x, i32 addrspace(5)* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y
+ %val = load volatile i32, i32 addrspace(5)* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
index 80a08ac..2616d84 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
@@ -4,11 +4,11 @@
declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -159,9 +159,9 @@
; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
@@ -169,18 +169,18 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind {
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -188,9 +188,9 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -198,13 +198,13 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %out.gep = getelementptr i32, i32* %out, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out.gep
ret void
}
@@ -212,11 +212,11 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -224,9 +224,9 @@
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -235,10 +235,10 @@
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -246,8 +246,8 @@
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64 addrspace(4)* %ptr) nounwind {
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -256,9 +256,9 @@
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -267,13 +267,13 @@
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %out.gep = getelementptr i64, i64* %out, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out.gep
ret void
}
@@ -282,11 +282,11 @@
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
index 75ce7f5..689e2b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
@@ -4,11 +4,11 @@
declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -261,9 +261,9 @@
; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
@@ -271,18 +271,18 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32 addrspace(4)* %ptr) nounwind {
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -290,9 +290,9 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -300,13 +300,13 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %out.gep = getelementptr i32, i32* %out, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out.gep
ret void
}
@@ -314,11 +314,11 @@
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -341,9 +341,9 @@
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -352,10 +352,10 @@
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -363,8 +363,8 @@
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64 addrspace(4)* %ptr) nounwind {
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -373,9 +373,9 @@
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -384,13 +384,13 @@
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %out.gep = getelementptr i64, i64* %out, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out.gep
ret void
}
@@ -399,11 +399,11 @@
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll
index dda91bc..d6dd6ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll
@@ -8,8 +8,8 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: ; return
define amdgpu_ps i32 @test_ps() #1 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
@@ -21,8 +21,8 @@
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:4
; GCN: s_load_dword s0, s[0:1], 0x0
define amdgpu_cs i32 @test_cs() #1 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
diff --git a/llvm/test/CodeGen/AMDGPU/load-hi16.ll b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
index 8039ec3..6ca009b 100644
--- a/llvm/test/CodeGen/AMDGPU/load-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
@@ -221,9 +221,9 @@
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2i16_reglo_vreg(i16 addrspace(4)* %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg(i16* %in, i16 %reg) #0 {
entry:
- %load = load i16, i16 addrspace(4)* %in
+ %load = load i16, i16* %in
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -241,9 +241,9 @@
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2f16_reglo_vreg(half addrspace(4)* %in, half %reg) #0 {
+define void @load_flat_hi_v2f16_reglo_vreg(half* %in, half %reg) #0 {
entry:
- %load = load half, half addrspace(4)* %in
+ %load = load half, half* %in
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@@ -261,9 +261,9 @@
; VI: flat_load_ubyte v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg_zexti8(i8* %in, i16 %reg) #0 {
entry:
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -282,9 +282,9 @@
; VI: flat_load_sbyte v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg_sexti8(i8* %in, i16 %reg) #0 {
entry:
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -301,10 +301,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg(i16* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i16, i16* %in, i64 2045
- %load = load i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
+ %load = load i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -320,10 +320,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_hi_v2f16_reglo_vreg(half* byval %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg(half addrspace(5)* byval %in, half %reg) #0 {
entry:
- %gep = getelementptr inbounds half, half* %in, i64 2045
- %load = load half, half* %gep
+ %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045
+ %load = load half, half addrspace(5)* %gep
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@@ -339,9 +339,9 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_nooff(i16* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff(i16 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -357,9 +357,9 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2f16_reglo_vreg_nooff(half* %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, half %reg) #0 {
entry:
- %load = load volatile half, half* inttoptr (i32 4094 to half*)
+ %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@@ -375,10 +375,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -395,10 +395,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -415,9 +415,9 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i16 %reg) #0 {
entry:
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -434,9 +434,9 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i16 %reg) #0 {
entry:
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -453,9 +453,9 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8* %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, half %reg) #0 {
entry:
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%bc.ext = bitcast i16 %ext to half
%build0 = insertelement <2 x half> undef, half %reg, i32 0
@@ -510,12 +510,12 @@
; GFX9-NEXT: buffer_load_short_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4094
define void @load_private_hi_v2i16_reglo_vreg_to_offset(i16 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i16], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
- %load = load i16, i16* %gep
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i16], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
+ %load = load i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -527,12 +527,12 @@
; GFX9-NEXT: buffer_load_sbyte_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4095
define void @load_private_hi_v2i16_reglo_vreg_sexti8_to_offset(i16 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load i8, i8* %gep
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load i8, i8 addrspace(5)* %gep
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -545,12 +545,12 @@
; GFX9-NEXT: buffer_load_ubyte_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4095
define void @load_private_hi_v2i16_reglo_vreg_zexti8_to_offset(i16 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load i8, i8* %gep
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load i8, i8 addrspace(5)* %gep
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -606,11 +606,11 @@
; GFX9-NEXT: s_waitcnt
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_setpc_b64
-define <2 x i16> @load_flat_v2i16_split(i16 addrspace(4)* %in) #0 {
+define <2 x i16> @load_flat_v2i16_split(i16* %in) #0 {
entry:
- %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 1
- %load0 = load volatile i16, i16 addrspace(4)* %in
- %load1 = load volatile i16, i16 addrspace(4)* %gep
+ %gep = getelementptr inbounds i16, i16* %in, i64 1
+ %load0 = load volatile i16, i16* %in
+ %load1 = load volatile i16, i16* %gep
%build0 = insertelement <2 x i16> undef, i16 %load0, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load1, i32 1
ret <2 x i16> %build1
@@ -644,11 +644,11 @@
; GFX9-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:6
; GFX9-NEXT: s_waitcnt
; GFX9-NEXT: s_setpc_b64
-define <2 x i16> @load_private_v2i16_split(i16* byval %in) #0 {
+define <2 x i16> @load_private_v2i16_split(i16 addrspace(5)* byval %in) #0 {
entry:
- %gep = getelementptr inbounds i16, i16* %in, i32 1
- %load0 = load volatile i16, i16* %in
- %load1 = load volatile i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i32 1
+ %load0 = load volatile i16, i16 addrspace(5)* %in
+ %load1 = load volatile i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %load0, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load1, i32 1
ret <2 x i16> %build1
diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
index 98a790d..226a55b 100644
--- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
@@ -269,10 +269,10 @@
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_or_b32_e32
-define void @load_flat_lo_v2i16_reghi_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load i16, i16 addrspace(4)* %in
+ %load = load i16, i16* %in
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -288,10 +288,10 @@
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_or_b32_e32
-define void @load_flat_lo_v2f16_reghi_vreg(half addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %load = load half, half addrspace(4)* %in
+ %load = load half, half* %in
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void
@@ -307,10 +307,10 @@
; VI: flat_load_ubyte v{{[0-9]+}}
; VI: v_or_b32_e32
-define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -328,10 +328,10 @@
; VI: flat_load_sbyte v{{[0-9]+}}
; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -347,11 +347,11 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg(i16* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %gep = getelementptr inbounds i16, i16* %in, i64 2045
- %load = load i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
+ %load = load i16, i16 addrspace(5)* %gep
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -369,10 +369,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_lo_v2i16_reghi_vreg(i16* byval %in, i16 %reg) #0 {
+define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i16, i16* %in, i64 2045
- %load = load i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
+ %load = load i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -388,11 +388,11 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_lo_v2f16_reglo_vreg(half* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %gep = getelementptr inbounds half, half* %in, i64 2045
- %load = load half, half* %gep
+ %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045
+ %load = load half, half addrspace(5)* %gep
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void
@@ -407,10 +407,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_nooff(i16* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -425,10 +425,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reghi_vreg_nooff(i16* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -443,10 +443,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2f16_reglo_vreg_nooff(half* %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %load = load volatile half, half* inttoptr (i32 4094 to half*)
+ %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void
@@ -461,11 +461,11 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -481,11 +481,11 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -501,10 +501,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -520,10 +520,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -539,10 +539,10 @@
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%bc.ext = bitcast i16 %ext to half
%build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
@@ -595,13 +595,13 @@
; VI: buffer_load_ushort v
define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i16], align 2
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i16], align 2, addrspace(5)
%reg.bc = bitcast i32 %reg to <2 x i16>
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
- %load = load volatile i16, i16* %gep
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
+ %load = load volatile i16, i16 addrspace(5)* %gep
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -614,13 +614,13 @@
; VI: buffer_load_sbyte v
define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
%reg.bc = bitcast i32 %reg to <2 x i16>
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load volatile i8, i8* %gep
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load volatile i8, i8 addrspace(5)* %gep
%load.ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -634,13 +634,13 @@
; VI: buffer_load_ubyte v
define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
%reg.bc = bitcast i32 %reg to <2 x i16>
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load volatile i8, i8* %gep
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load volatile i8, i8 addrspace(5)* %gep
%load.ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-offset.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-offset.ll
index 9b9d844..928eeca 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-slot-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-offset.ll
@@ -13,22 +13,22 @@
; CHECK: buffer_load_dword
define amdgpu_gs float @main(float %v1, float %v2, i32 %idx1, i32 %idx2) {
main_body:
- %m1 = alloca [513 x float]
- %m2 = alloca [513 x float]
+ %m1 = alloca [513 x float], addrspace(5)
+ %m2 = alloca [513 x float], addrspace(5)
- %gep1.store = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 %idx1
- store float %v1, float* %gep1.store
+ %gep1.store = getelementptr [513 x float], [513 x float] addrspace(5)* %m1, i32 0, i32 %idx1
+ store float %v1, float addrspace(5)* %gep1.store
- %gep2.store = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 %idx2
- store float %v2, float* %gep2.store
+ %gep2.store = getelementptr [513 x float], [513 x float] addrspace(5)* %m2, i32 0, i32 %idx2
+ store float %v2, float addrspace(5)* %gep2.store
; This used to use a base reg equal to 0.
- %gep1.load = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 0
- %out1 = load float, float* %gep1.load
+ %gep1.load = getelementptr [513 x float], [513 x float] addrspace(5)* %m1, i32 0, i32 0
+ %out1 = load float, float addrspace(5)* %gep1.load
; This used to attempt to re-use the base reg at 0, generating an out-of-bounds instruction offset.
- %gep2.load = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 512
- %out2 = load float, float* %gep2.load
+ %gep2.load = getelementptr [513 x float], [513 x float] addrspace(5)* %m2, i32 0, i32 512
+ %out2 = load float, float addrspace(5)* %gep2.load
%r = fadd float %out1, %out2
ret float %r
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
index 15fff2a..337dbd1 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
@@ -7,10 +7,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
ret void
}
@@ -20,10 +20,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
ret void
}
@@ -33,10 +33,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
ret void
}
@@ -46,10 +46,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
ret void
}
@@ -59,10 +59,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
ret void
}
@@ -72,10 +72,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
ret void
}
@@ -85,10 +85,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
ret void
}
@@ -98,10 +98,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
ret void
}
@@ -111,10 +111,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
ret void
}
@@ -124,10 +124,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
@@ -137,10 +137,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
ret void
}
@@ -150,10 +150,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
ret void
}
@@ -163,10 +163,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
ret void
}
@@ -176,10 +176,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
ret void
}
@@ -189,10 +189,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
ret void
}
@@ -202,10 +202,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
ret void
}
@@ -215,10 +215,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
ret void
}
@@ -228,10 +228,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
ret void
}
@@ -241,10 +241,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
ret void
}
@@ -254,10 +254,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
ret void
}
@@ -267,10 +267,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
ret void
}
@@ -280,10 +280,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
ret void
}
@@ -293,10 +293,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
ret void
}
@@ -306,10 +306,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
ret void
}
@@ -319,10 +319,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
ret void
}
@@ -332,10 +332,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
ret void
}
@@ -345,10 +345,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
ret void
}
@@ -358,10 +358,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
ret void
}
@@ -371,10 +371,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
ret void
}
@@ -384,10 +384,10 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -397,10 +397,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
ret void
}
@@ -410,10 +410,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
ret void
}
@@ -423,10 +423,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
ret void
}
@@ -436,10 +436,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
ret void
}
@@ -449,10 +449,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
ret void
}
@@ -462,10 +462,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
ret void
}
@@ -475,10 +475,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
ret void
}
@@ -488,10 +488,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
ret void
}
@@ -501,10 +501,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
ret void
}
@@ -514,10 +514,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
ret void
}
@@ -527,10 +527,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
ret void
}
@@ -540,10 +540,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
ret void
}
@@ -553,10 +553,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
ret void
}
@@ -566,10 +566,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
ret void
}
@@ -579,10 +579,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
ret void
}
@@ -592,10 +592,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
ret void
}
@@ -605,10 +605,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
ret void
}
@@ -618,10 +618,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
ret void
}
@@ -631,10 +631,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
ret void
}
@@ -644,9 +644,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
index 6f537c2..ae265f9 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
@@ -7,9 +7,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in monotonic
ret void
}
@@ -19,9 +19,9 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in acquire
ret void
}
@@ -31,9 +31,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in release
ret void
}
@@ -43,9 +43,9 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel
ret void
}
@@ -55,9 +55,9 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
ret void
}
@@ -67,9 +67,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") monotonic
ret void
}
@@ -79,9 +79,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acquire
ret void
}
@@ -91,9 +91,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") release
ret void
}
@@ -103,9 +103,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acq_rel
ret void
}
@@ -115,9 +115,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") seq_cst
ret void
}
@@ -127,9 +127,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") monotonic
ret void
}
@@ -139,9 +139,9 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire
ret void
}
@@ -151,9 +151,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") release
ret void
}
@@ -163,9 +163,9 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel
ret void
}
@@ -175,9 +175,9 @@
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -187,9 +187,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") monotonic
ret void
}
@@ -199,9 +199,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire
ret void
}
@@ -211,9 +211,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") release
ret void
}
@@ -223,9 +223,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel
ret void
}
@@ -235,9 +235,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst
ret void
}
@@ -247,9 +247,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") monotonic
ret void
}
@@ -259,9 +259,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acquire
ret void
}
@@ -271,9 +271,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") release
ret void
}
@@ -283,9 +283,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acq_rel
ret void
}
@@ -295,8 +295,8 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
index b89bb0f..f3a0e48 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
@@ -8,36 +8,36 @@
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_load void (i32 addrspace(4)*, i32 addrspace(4)*): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_load void (i32*, i32*): Unsupported synchronization scope
define amdgpu_kernel void @invalid_load(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("invalid") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("invalid") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_store void (i32, i32 addrspace(4)*): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_store void (i32, i32*): Unsupported synchronization scope
define amdgpu_kernel void @invalid_store(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("invalid") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("invalid") seq_cst, align 4
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_cmpxchg void (i32 addrspace(4)*, i32, i32): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_cmpxchg void (i32*, i32, i32): Unsupported synchronization scope
define amdgpu_kernel void @invalid_cmpxchg(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_rmw void (i32 addrspace(4)*, i32): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_rmw void (i32*, i32): Unsupported synchronization scope
define amdgpu_kernel void @invalid_rmw(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("invalid") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("invalid") seq_cst
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll
index 57e705f..938b697 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll
@@ -12,10 +12,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -26,10 +26,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -40,10 +40,10 @@
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -54,10 +54,10 @@
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -68,10 +68,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -82,10 +82,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -96,10 +96,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -110,10 +110,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -124,10 +124,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -138,10 +138,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -152,10 +152,10 @@
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -166,10 +166,10 @@
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -180,10 +180,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -194,10 +194,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -208,10 +208,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -222,10 +222,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -236,10 +236,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -250,10 +250,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -264,10 +264,10 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -278,42 +278,42 @@
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_0
; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_0(
- i32* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(5)* %in, i32* %out) {
entry:
- %val = load i32, i32* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val = load i32, i32 addrspace(5)* %in, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_1
; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_1(
- i32* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(5)* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid
- %val = load i32, i32* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val.gep = getelementptr inbounds i32, i32 addrspace(5)* %in, i32 %tid
+ %val = load i32, i32 addrspace(5)* %val.gep, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_global_0
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}}
define amdgpu_kernel void @nontemporal_global_0(
- i32 addrspace(1)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(1)* %in, i32* %out) {
entry:
%val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
@@ -321,56 +321,56 @@
; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}}
define amdgpu_kernel void @nontemporal_global_1(
- i32 addrspace(1)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(1)* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_local_0
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_0(
- i32 addrspace(3)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(3)* %in, i32* %out) {
entry:
%val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_local_1
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_1(
- i32 addrspace(3)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(3)* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid
%val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_flat_0
; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_0(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val = load i32, i32* %in, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_flat_1
; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_1(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid
- %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid
+ %val = load i32, i32* %val.gep, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
index e2bfae6..ccf11ad 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
@@ -3,27 +3,27 @@
--- |
; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
source_filename = "memory-legalizer-multiple-mem-operands.ll"
- target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
- %scratchptr01 = bitcast [8192 x i32]* %scratch0 to i32*
- store i32 1, i32* %scratchptr01
- %scratchptr12 = bitcast [8192 x i32]* %scratch1 to i32*
- store i32 2, i32* %scratchptr12
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
+ %scratchptr01 = bitcast [8192 x i32] addrspace(5)* %scratch0 to i32 addrspace(5)*
+ store i32 1, i32 addrspace(5)* %scratchptr01
+ %scratchptr12 = bitcast [8192 x i32] addrspace(5)* %scratch1 to i32 addrspace(5)*
+ store i32 2, i32 addrspace(5)* %scratchptr12
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
if: ; preds = %entry
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
- %if_value = load atomic i32, i32* %if_ptr syncscope("workgroup") seq_cst, align 4
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
+ %if_value = load atomic i32, i32 addrspace(5)* %if_ptr syncscope("workgroup") seq_cst, align 4
br label %done, !structurizecfg.uniform !0
else: ; preds = %entry
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
- %else_value = load atomic i32, i32* %else_ptr syncscope("agent") unordered, align 4
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
+ %else_value = load atomic i32, i32 addrspace(5)* %else_ptr syncscope("agent") unordered, align 4
br label %done, !structurizecfg.uniform !0
done: ; preds = %else, %if
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
index 04fb9cb..f7849d3 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
@@ -3,27 +3,27 @@
--- |
; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
source_filename = "memory-legalizer-multiple-mem-operands.ll"
- target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
- %scratchptr01 = bitcast [8192 x i32]* %scratch0 to i32*
- store i32 1, i32* %scratchptr01
- %scratchptr12 = bitcast [8192 x i32]* %scratch1 to i32*
- store i32 2, i32* %scratchptr12
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
+ %scratchptr01 = bitcast [8192 x i32] addrspace(5)* %scratch0 to i32 addrspace(5)*
+ store i32 1, i32 addrspace(5)* %scratchptr01
+ %scratchptr12 = bitcast [8192 x i32] addrspace(5)* %scratch1 to i32 addrspace(5)*
+ store i32 2, i32 addrspace(5)* %scratchptr12
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
if: ; preds = %entry
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
- %if_value = load i32, i32* %if_ptr, align 4, !nontemporal !1
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
+ %if_value = load i32, i32 addrspace(5)* %if_ptr, align 4, !nontemporal !1
br label %done, !structurizecfg.uniform !0
else: ; preds = %entry
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
- %else_value = load i32, i32* %else_ptr, align 4, !nontemporal !1
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
+ %else_value = load i32, i32 addrspace(5)* %else_ptr, align 4, !nontemporal !1
br label %done, !structurizecfg.uniform !0
done: ; preds = %else, %if
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
index b13ea87..d0ee5b5 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
@@ -3,27 +3,27 @@
--- |
; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
source_filename = "memory-legalizer-multiple-mem-operands.ll"
- target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
- %scratchptr01 = bitcast [8192 x i32]* %scratch0 to i32*
- store i32 1, i32* %scratchptr01
- %scratchptr12 = bitcast [8192 x i32]* %scratch1 to i32*
- store i32 2, i32* %scratchptr12
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
+ %scratchptr01 = bitcast [8192 x i32] addrspace(5)* %scratch0 to i32 addrspace(5)*
+ store i32 1, i32 addrspace(5)* %scratchptr01
+ %scratchptr12 = bitcast [8192 x i32] addrspace(5)* %scratch1 to i32 addrspace(5)*
+ store i32 2, i32 addrspace(5)* %scratchptr12
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
if: ; preds = %entry
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
- %if_value = load i32, i32* %if_ptr, align 4, !nontemporal !1
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
+ %if_value = load i32, i32 addrspace(5)* %if_ptr, align 4, !nontemporal !1
br label %done, !structurizecfg.uniform !0
else: ; preds = %entry
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
- %else_value = load i32, i32* %else_ptr, align 4
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
+ %else_value = load i32, i32 addrspace(5)* %else_ptr, align 4
br label %done, !structurizecfg.uniform !0
done: ; preds = %else, %if
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll
index 50df0bc..296ef16 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll
@@ -9,9 +9,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4
+ store atomic i32 %in, i32* %out unordered, align 4
ret void
}
@@ -19,9 +19,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4
+ store atomic i32 %in, i32* %out monotonic, align 4
ret void
}
@@ -29,9 +29,9 @@
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out release, align 4
+ store atomic i32 %in, i32* %out release, align 4
ret void
}
@@ -39,9 +39,9 @@
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4
+ store atomic i32 %in, i32* %out seq_cst, align 4
ret void
}
@@ -49,9 +49,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
ret void
}
@@ -59,9 +59,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
ret void
}
@@ -69,9 +69,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") release, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
ret void
}
@@ -79,9 +79,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
ret void
}
@@ -89,9 +89,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
ret void
}
@@ -99,9 +99,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
ret void
}
@@ -109,9 +109,9 @@
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") release, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") release, align 4
ret void
}
@@ -119,9 +119,9 @@
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
ret void
}
@@ -129,9 +129,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
ret void
}
@@ -139,9 +139,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
ret void
}
@@ -149,9 +149,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") release, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
ret void
}
@@ -159,9 +159,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
ret void
}
@@ -169,9 +169,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
ret void
}
@@ -179,9 +179,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
ret void
}
@@ -189,9 +189,9 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") release, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
ret void
}
@@ -199,31 +199,31 @@
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_0
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_0(
- i32 addrspace(4)* %in, i32* %out) {
+ i32* %in, i32 addrspace(5)* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
- store i32 %val, i32* %out, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ store i32 %val, i32 addrspace(5)* %out, !nontemporal !0
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_1
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_1(
- i32 addrspace(4)* %in, i32* %out) {
+ i32* %in, i32 addrspace(5)* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
- %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
- store i32 %val, i32* %out.gep, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ %out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid
+ store i32 %val, i32 addrspace(5)* %out.gep, !nontemporal !0
ret void
}
@@ -231,9 +231,9 @@
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
define amdgpu_kernel void @nontemporal_global_0(
- i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
+ i32* %in, i32 addrspace(1)* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
store i32 %val, i32 addrspace(1)* %out, !nontemporal !0
ret void
}
@@ -242,10 +242,10 @@
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
define amdgpu_kernel void @nontemporal_global_1(
- i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
+ i32* %in, i32 addrspace(1)* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0
ret void
@@ -254,9 +254,9 @@
; GCN-LABEL: {{^}}nontemporal_local_0
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_0(
- i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
+ i32* %in, i32 addrspace(3)* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
store i32 %val, i32 addrspace(3)* %out, !nontemporal !0
ret void
}
@@ -264,10 +264,10 @@
; GCN-LABEL: {{^}}nontemporal_local_1
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_1(
- i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
+ i32* %in, i32 addrspace(3)* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
%out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid
store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0
ret void
@@ -276,22 +276,22 @@
; GCN-LABEL: {{^}}nontemporal_flat_0
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_0(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
- store i32 %val, i32 addrspace(4)* %out, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ store i32 %val, i32* %out, !nontemporal !0
ret void
}
; GCN-LABEL: {{^}}nontemporal_flat_1
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_1(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
- %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid
- store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
+ store i32 %val, i32* %out.gep, !nontemporal !0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
index 539eed9..f44dcc0 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
@@ -13,7 +13,7 @@
; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @in_worklist_once() #0 {
bb:
- %tmp = load i64, i64* undef
+ %tmp = load i64, i64 addrspace(5)* undef
br label %bb1
bb1: ; preds = %bb1, %bb
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll
index 23bd2e4..5a0d87f 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll
@@ -7,49 +7,49 @@
; GCN-LABEL: {{^}}store_private_offset_i8:
; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i8() #0 {
- store volatile i8 5, i8* inttoptr (i32 8 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i16:
; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i16() #0 {
- store volatile i16 5, i16* inttoptr (i32 8 to i16*)
+ store volatile i16 5, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i32:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i32() #0 {
- store volatile i32 5, i32* inttoptr (i32 8 to i32*)
+ store volatile i32 5, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v2i32:
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v2i32() #0 {
- store volatile <2 x i32> <i32 5, i32 10>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
+ store volatile <2 x i32> <i32 5, i32 10>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v4i32:
; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v4i32() #0 {
- store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
+ store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_i8:
; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i8() #0 {
- %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}sextload_private_offset_i8:
; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
- %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
%sextload = sext i8 %load to i32
store i32 %sextload, i32 addrspace(1)* undef
ret void
@@ -58,7 +58,7 @@
; GCN-LABEL: {{^}}zextload_private_offset_i8:
; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
- %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
%zextload = zext i8 %load to i32
store i32 %zextload, i32 addrspace(1)* undef
ret void
@@ -67,14 +67,14 @@
; GCN-LABEL: {{^}}load_private_offset_i16:
; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i16() #0 {
- %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}sextload_private_offset_i16:
; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
- %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
%sextload = sext i16 %load to i32
store i32 %sextload, i32 addrspace(1)* undef
ret void
@@ -83,7 +83,7 @@
; GCN-LABEL: {{^}}zextload_private_offset_i16:
; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
- %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
%zextload = zext i16 %load to i32
store i32 %zextload, i32 addrspace(1)* undef
ret void
@@ -92,28 +92,28 @@
; GCN-LABEL: {{^}}load_private_offset_i32:
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i32() #0 {
- %load = load volatile i32, i32* inttoptr (i32 8 to i32*)
+ %load = load volatile i32, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v2i32:
; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v2i32() #0 {
- %load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
+ %load = load volatile <2 x i32>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v4i32:
; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v4i32() #0 {
- %load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
+ %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:4095
define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
- store volatile i8 5, i8* inttoptr (i32 4095 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4095 to i8 addrspace(5)*)
ret void
}
@@ -121,7 +121,7 @@
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
- store volatile i8 5, i8* inttoptr (i32 4096 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4096 to i8 addrspace(5)*)
ret void
}
@@ -129,7 +129,7 @@
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen offset:1{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
- store volatile i8 5, i8* inttoptr (i32 4097 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4097 to i8 addrspace(5)*)
ret void
}
@@ -144,11 +144,11 @@
; GFX9: v_add_u32_e32 [[ADDR:v[0-9]+]], 4,
; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:32
define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 {
- %alloca = alloca [16 x i32], align 4
+ %alloca = alloca [16 x i32], align 4, addrspace(5)
%vaddr = load volatile i32, i32 addrspace(1)* undef
%vaddr.off = add i32 %vaddr, 8
- %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %vaddr.off
- store volatile i32 9, i32* %gep
+ %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %vaddr.off
+ store volatile i32 9, i32 addrspace(5)* %gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
index 4cefa7e..c110534 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
@@ -39,11 +39,11 @@
; GCN: s_sub_u32 s32, s32, 0x1200{{$}}
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
- %alloca = alloca [16 x i32], align 4
- %gep0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
- %gep15 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 16
- store volatile i32 0, i32* %gep0
- store volatile i32 0, i32* %gep15
+ %alloca = alloca [16 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
+ %gep15 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 16
+ store volatile i32 0, i32 addrspace(5)* %gep0
+ store volatile i32 0, i32 addrspace(5)* %gep15
call void @external_void_func_i32(i32 42)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
index 87f3714..4685e54 100644
--- a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
+++ b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
@@ -13,44 +13,44 @@
define amdgpu_kernel void @_Z9chk1D_512v() #0 {
entry:
- %a0 = alloca i32, align 4
- %b0 = alloca i32, align 4
- %c0 = alloca i32, align 4
- %d0 = alloca i32, align 4
- %a1 = alloca i32, align 4
- %b1 = alloca i32, align 4
- %c1 = alloca i32, align 4
- %d1 = alloca i32, align 4
- %data = alloca i32, align 4
- %0 = load i32, i32* %a0, align 4
- %1 = load i32, i32* %b0, align 4
+ %a0 = alloca i32, align 4, addrspace(5)
+ %b0 = alloca i32, align 4, addrspace(5)
+ %c0 = alloca i32, align 4, addrspace(5)
+ %d0 = alloca i32, align 4, addrspace(5)
+ %a1 = alloca i32, align 4, addrspace(5)
+ %b1 = alloca i32, align 4, addrspace(5)
+ %c1 = alloca i32, align 4, addrspace(5)
+ %d1 = alloca i32, align 4, addrspace(5)
+ %data = alloca i32, align 4, addrspace(5)
+ %0 = load i32, i32 addrspace(5)* %a0, align 4
+ %1 = load i32, i32 addrspace(5)* %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %entry
- %2 = load i32, i32* %c0, align 4
- %3 = load i32, i32* %d0, align 4
+ %2 = load i32, i32 addrspace(5)* %c0, align 4
+ %3 = load i32, i32 addrspace(5)* %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %land.lhs.true
- store i32 1, i32* %data, align 4
+ store i32 1, i32 addrspace(5)* %data, align 4
br label %if.end
if.end: ; preds = %if.then, %land.lhs.true, %entry
- %4 = load i32, i32* %a1, align 4
- %5 = load i32, i32* %b1, align 4
+ %4 = load i32, i32 addrspace(5)* %a1, align 4
+ %5 = load i32, i32 addrspace(5)* %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.end6
land.lhs.true3: ; preds = %if.end
- %6 = load i32, i32* %c1, align 4
- %7 = load i32, i32* %d1, align 4
+ %6 = load i32, i32 addrspace(5)* %c1, align 4
+ %7 = load i32, i32 addrspace(5)* %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.end6
if.then5: ; preds = %land.lhs.true3
- store i32 1, i32* %data, align 4
+ store i32 1, i32 addrspace(5)* %data, align 4
br label %if.end6
if.end6: ; preds = %if.then5, %land.lhs.true3, %if.end
diff --git a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll
index cf0c794..8e96fbb 100644
--- a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll
@@ -19,7 +19,7 @@
; OPTNONE-NOT: s_mov_b32
; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s5 offen{{$}}
define amdgpu_kernel void @store_to_undef() #0 {
- store volatile i32 0, i32* undef
+ store volatile i32 0, i32 addrspace(5)* undef
ret void
}
@@ -29,7 +29,7 @@
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @store_to_inttoptr() #0 {
- store volatile i32 0, i32* inttoptr (i32 124 to i32*)
+ store volatile i32 0, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*)
ret void
}
@@ -39,7 +39,7 @@
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
define amdgpu_kernel void @load_from_undef() #0 {
- %ld = load volatile i32, i32* undef
+ %ld = load volatile i32, i32 addrspace(5)* undef
ret void
}
@@ -49,7 +49,7 @@
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @load_from_inttoptr() #0 {
- %ld = load volatile i32, i32* inttoptr (i32 124 to i32*)
+ %ld = load volatile i32, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/private-element-size.ll b/llvm/test/CodeGen/AMDGPU/private-element-size.ll
index f805430..205a4db 100644
--- a/llvm/test/CodeGen/AMDGPU/private-element-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-element-size.ll
@@ -43,13 +43,13 @@
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x <4 x i32>], align 16
- %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 1
- store <4 x i32> zeroinitializer, <4 x i32>* %gep0
- store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %gep1
- %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 %index
- %load = load <4 x i32>, <4 x i32>* %gep2
+ %alloca = alloca [2 x <4 x i32>], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 1
+ store <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %gep0
+ store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load <4 x i32>, <4 x i32> addrspace(5)* %gep2
store <4 x i32> %load, <4 x i32> addrspace(1)* %out
ret void
}
@@ -113,13 +113,13 @@
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x <8 x i32>], align 16
- %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 1
- store <8 x i32> zeroinitializer, <8 x i32>* %gep0
- store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* %gep1
- %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 %index
- %load = load <8 x i32>, <8 x i32>* %gep2
+ %alloca = alloca [2 x <8 x i32>], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 1
+ store <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %gep0
+ store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load <8 x i32>, <8 x i32> addrspace(5)* %gep2
store <8 x i32> %load, <8 x i32> addrspace(1)* %out
ret void
}
@@ -150,13 +150,13 @@
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x i64], align 16
- %gep0 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 1
- store i64 0, i64* %gep0
- store i64 34359738602, i64* %gep1
- %gep2 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 %index
- %load = load i64, i64* %gep2
+ %alloca = alloca [2 x i64], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 1
+ store i64 0, i64 addrspace(5)* %gep0
+ store i64 34359738602, i64 addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load i64, i64 addrspace(5)* %gep2
store i64 %load, i64 addrspace(1)* %out
ret void
}
@@ -186,13 +186,13 @@
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x double], align 16
- %gep0 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 1
- store double 0.0, double* %gep0
- store double 4.0, double* %gep1
- %gep2 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 %index
- %load = load double, double* %gep2
+ %alloca = alloca [2 x double], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 1
+ store double 0.0, double addrspace(5)* %gep0
+ store double 4.0, double addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load double, double addrspace(5)* %gep2
store double %load, double addrspace(1)* %out
ret void
}
@@ -235,13 +235,13 @@
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x <2 x i64>], align 16
- %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 1
- store <2 x i64> zeroinitializer, <2 x i64>* %gep0
- store <2 x i64> <i64 1, i64 2>, <2 x i64>* %gep1
- %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 %index
- %load = load <2 x i64>, <2 x i64>* %gep2
+ %alloca = alloca [2 x <2 x i64>], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 1
+ store <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %gep0
+ store <2 x i64> <i64 1, i64 2>, <2 x i64> addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load <2 x i64>, <2 x i64> addrspace(5)* %gep2
store <2 x i64> %load, <2 x i64> addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index dfec830..bf3b50e 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -6,26 +6,26 @@
; Private atomics have no real use, but at least shouldn't crash on it.
define amdgpu_kernel void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %tmp4 = atomicrmw add i32 addrspace(5)* %tmp3, i32 7 acq_rel
store i32 %tmp4, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %tmp4 = cmpxchg i32 addrspace(5)* %tmp3, i32 0, i32 1 acq_rel monotonic
%val = extractvalue { i32, i1 } %tmp4, 0
store i32 %val, i32 addrspace(1)* %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
index b440efc..19e89ce 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
@@ -7,20 +7,20 @@
; Make sure that AMDGPUPromoteAlloca doesn't crash if the called
; function is a constantexpr cast of a function.
-declare void @foo(float*) #0
+declare void @foo(float addrspace(5)*) #0
declare void @foo.varargs(...) #0
; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo
define amdgpu_kernel void @crash_call_constexpr_cast() #0 {
- %alloca = alloca i32
- call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0
+ %alloca = alloca i32, addrspace(5)
+ call void bitcast (void (float addrspace(5)*)* @foo to void (i32 addrspace(5)*)*)(i32 addrspace(5)* %alloca) #0
ret void
}
; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs
define amdgpu_kernel void @crash_call_constexpr_cast_varargs() #0 {
- %alloca = alloca i32
- call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0
+ %alloca = alloca i32, addrspace(5)
+ call void bitcast (void (...)* @foo.varargs to void (i32 addrspace(5)*)*)(i32 addrspace(5)* %alloca) #0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
index 515cf81..51aacf5 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
+; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=ASM %s
; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
@@ -8,19 +8,19 @@
; ASM: ; ScratchSize: 24
define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx4, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx4, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx5
+ %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx5
%arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx6
ret void
@@ -35,13 +35,13 @@
; ASM: ; ScratchSize: 0
define void @promote_to_vector_call_c(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = load i32, i32* %tmp3
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %tmp4 = load i32, i32 addrspace(5)* %tmp3
%tmp5 = load volatile i32, i32 addrspace(1)* undef
%tmp6 = add i32 %tmp4, %tmp5
store i32 %tmp6, i32 addrspace(1)* %out
@@ -56,25 +56,25 @@
; ASM: ; ScratchSize: 24
define void @no_promote_to_lds_c(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
}
-declare i32 @foo(i32*) #0
+declare i32 @foo(i32 addrspace(5)*) #0
; ASM-LABEL: {{^}}call_private:
; ASM: buffer_store_dword
@@ -83,13 +83,13 @@
; ASM: ScratchSize: 16396
define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %val = call i32 @foo(i32* %tmp3)
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %val = call i32 @foo(i32 addrspace(5)* %tmp3)
store i32 %val, i32 addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-globals.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-globals.ll
index 38db51d..4403d1f 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-globals.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-globals.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
+; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
@@ -12,19 +12,19 @@
define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
- %stack = alloca [10 x i32], align 4
+ %stack = alloca [10 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 %tmp
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
%v0 = getelementptr inbounds [750 x [10 x i32]], [750 x [10 x i32]] addrspace(3)* @global_array0, i32 0, i32 0, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
index 653e259..6a41c3a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
@@ -7,13 +7,13 @@
; OPTS: ds_write
define amdgpu_kernel void @promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x i32]], addrspace(5)
+ %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -23,13 +23,13 @@
; ALL-NOT ds_write
define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #1 {
entry:
- %alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x i32]], addrspace(5)
+ %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
index 137b41d..445bfa5 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
@@ -32,19 +32,19 @@
; GCN: workgroup_group_segment_byte_size = 2340
define amdgpu_kernel void @promote_alloca_size_order_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
@@ -64,19 +64,19 @@
; GCN: workgroup_group_segment_byte_size = 2352
define amdgpu_kernel void @promote_alloca_size_order_1(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
@@ -102,19 +102,19 @@
; GCN: workgroup_group_segment_byte_size = 1060
define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
index 03ce116..b57323a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
@@ -5,22 +5,22 @@
; GCN-LABEL: {{^}}stored_lds_pointer_value:
; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value(float* addrspace(1)* %ptr) #0 {
- %tmp = alloca float
- store float 0.0, float *%tmp
- store float* %tmp, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_lds_pointer_value(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp = alloca float, addrspace(5)
+ store float 0.0, float addrspace(5)*%tmp
+ store float addrspace(5)* %tmp, float addrspace(5)* addrspace(1)* %ptr
ret void
}
; GCN-LABEL: {{^}}stored_lds_pointer_value_offset:
; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value_offset(float* addrspace(1)* %ptr) #0 {
- %tmp0 = alloca float
- %tmp1 = alloca float
- store float 0.0, float *%tmp0
- store float 0.0, float *%tmp1
- store volatile float* %tmp0, float* addrspace(1)* %ptr
- store volatile float* %tmp1, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_lds_pointer_value_offset(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp0 = alloca float, addrspace(5)
+ %tmp1 = alloca float, addrspace(5)
+ store float 0.0, float addrspace(5)*%tmp0
+ store float 0.0, float addrspace(5)*%tmp1
+ store volatile float addrspace(5)* %tmp0, float addrspace(5)* addrspace(1)* %ptr
+ store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -29,12 +29,12 @@
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
; GCN: buffer_store_dword v
; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value_gep(float* addrspace(1)* %ptr, i32 %idx) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value_gep(float addrspace(5)* addrspace(1)* %ptr, i32 %idx) #0 {
bb:
- %tmp = alloca float, i32 16
- store float 0.0, float* %tmp
- %tmp2 = getelementptr inbounds float, float* %tmp, i32 %idx
- store float* %tmp2, float* addrspace(1)* %ptr
+ %tmp = alloca float, i32 16, addrspace(5)
+ store float 0.0, float addrspace(5)* %tmp
+ %tmp2 = getelementptr inbounds float, float addrspace(5)* %tmp, i32 %idx
+ store float addrspace(5)* %tmp2, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -46,29 +46,29 @@
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dword
-define amdgpu_kernel void @stored_vector_pointer_value(i32* addrspace(1)* %out, i32 %index) {
+define amdgpu_kernel void @stored_vector_pointer_value(i32 addrspace(5)* addrspace(1)* %out, i32 %index) {
entry:
- %tmp0 = alloca [4 x i32]
- %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 0
- %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 1
- %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 2
- %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 3
- store i32 0, i32* %x
- store i32 1, i32* %y
- store i32 2, i32* %z
- store i32 3, i32* %w
- %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 %index
- store i32* %tmp1, i32* addrspace(1)* %out
+ %tmp0 = alloca [4 x i32], addrspace(5)
+ %x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 0
+ %y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 1
+ %z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 2
+ %w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 3
+ store i32 0, i32 addrspace(5)* %x
+ store i32 1, i32 addrspace(5)* %y
+ store i32 2, i32 addrspace(5)* %z
+ store i32 3, i32 addrspace(5)* %w
+ %tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 %index
+ store i32 addrspace(5)* %tmp1, i32 addrspace(5)* addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}stored_fi_to_self:
; GCN-NOT: ds_
define amdgpu_kernel void @stored_fi_to_self() #0 {
- %tmp = alloca i32*
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp
- %bitcast = bitcast i32** %tmp to i32*
- store volatile i32* %bitcast, i32** %tmp
+ %tmp = alloca i32 addrspace(5)*, addrspace(5)
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp
+ %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp to i32 addrspace(5)*
+ store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
index 2604ed4..6caa617 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
@@ -6,10 +6,10 @@
%struct.foo = type {i32, i32, i32}
-define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo* %in, i32 %offset) {
+define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo addrspace(5)* %in, i32 %offset) {
entry:
- %ptr = getelementptr inbounds %struct.foo, %struct.foo* %in, i32 1, i32 2
- %x = load i32, i32 *%ptr, align 4
+ %ptr = getelementptr inbounds %struct.foo, %struct.foo addrspace(5)* %in, i32 1, i32 2
+ %x = load i32, i32 addrspace(5)*%ptr, align 4
br label %loop
loop:
%i = phi i32 [ 100, %entry ], [ %nexti, %loop ]
diff --git a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
index 53ee214..99d55fe 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
@@ -12,13 +12,13 @@
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
- %0 = alloca [2 x i32]
- %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
- store i32 0, i32* %1
- store i32 1, i32* %2
- %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
- %4 = load i32, i32* %3
+ %0 = alloca [2 x i32], addrspace(5)
+ %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %1
+ store i32 1, i32 addrspace(5)* %2
+ %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
+ %4 = load i32, i32 addrspace(5)* %3
%5 = call i32 @llvm.r600.read.tidig.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll
index 5d981a2..b6d6006 100644
--- a/llvm/test/CodeGen/AMDGPU/sad.ll
+++ b/llvm/test/CodeGen/AMDGPU/sad.ll
@@ -59,7 +59,7 @@
%t1 = select i1 %icmp1, i32 %a, i32 %b
%ret0 = sub i32 %t0, %t1
- store volatile i32 %ret0, i32 *undef
+ store volatile i32 %ret0, i32 addrspace(5)*undef
%ret = add i32 %ret0, %c
store i32 %ret, i32 addrspace(1)* %out
@@ -77,7 +77,7 @@
%ret0 = sub i32 %t0, %t1
%ret = add i32 %ret0, %c
- store volatile i32 %ret, i32 *undef
+ store volatile i32 %ret, i32 addrspace(5)*undef
store i32 %ret, i32 addrspace(1)* %out
ret void
}
@@ -87,7 +87,7 @@
define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
- store volatile i32 %t0, i32 *undef
+ store volatile i32 %t0, i32 addrspace(5)*undef
%icmp1 = icmp ule i32 %a, %b
%t1 = select i1 %icmp1, i32 %a, i32 %b
@@ -108,7 +108,7 @@
%icmp1 = icmp ule i32 %a, %b
%t1 = select i1 %icmp1, i32 %a, i32 %b
- store volatile i32 %t1, i32 *undef
+ store volatile i32 %t1, i32 addrspace(5)*undef
%ret0 = sub i32 %t0, %t1
%ret = add i32 %ret0, %c
@@ -122,7 +122,7 @@
define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%sub0 = sub i32 %a, %b
- store volatile i32 %sub0, i32 *undef
+ store volatile i32 %sub0, i32 addrspace(5)*undef
%sub1 = sub i32 %b, %a
%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
@@ -141,7 +141,7 @@
%sub0 = sub i32 %a, %b
%sub1 = sub i32 %b, %a
%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
- store volatile i32 %ret0, i32 *undef
+ store volatile i32 %ret0, i32 addrspace(5)*undef
%ret = add i32 %ret0, %c
diff --git a/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll b/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll
index ade5a96..3631d67 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll
@@ -15,26 +15,26 @@
define amdgpu_kernel void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
- %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 0
- store i32 1, i32* %scratchptr0
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 0
+ store i32 1, i32 addrspace(5)* %scratchptr0
- %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 0
- store i32 2, i32* %scratchptr1
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 0
+ store i32 2, i32 addrspace(5)* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32, i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32 addrspace(5)* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32, i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32 addrspace(5)* %else_ptr
br label %done
done:
@@ -55,29 +55,29 @@
define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
%offset0 = load i32, i32 addrspace(1)* %offsets
- %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0
- store i32 %offset0, i32* %scratchptr0
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %offset0
+ store i32 %offset0, i32 addrspace(5)* %scratchptr0
%offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1
%offset1 = load i32, i32 addrspace(1)* %offsetptr1
- %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1
- store i32 %offset1, i32* %scratchptr1
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %offset1
+ store i32 %offset1, i32 addrspace(5)* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32, i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32 addrspace(5)* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32, i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32 addrspace(5)* %else_ptr
br label %done
done:
@@ -91,10 +91,10 @@
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) {
entry:
- %array = alloca [8192 x i32]
+ %array = alloca [8192 x i32], addrspace(5)
%ptr_offset = add i32 %offset, 4
- %ptr = getelementptr inbounds [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset
- store i32 0, i32* %ptr
+ %ptr = getelementptr inbounds [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset
+ store i32 0, i32 addrspace(5)* %ptr
ret void
}
@@ -103,10 +103,10 @@
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) {
entry:
- %array = alloca [8192 x i32]
+ %array = alloca [8192 x i32], addrspace(5)
%ptr_offset = add i32 %offset, 4
- %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset
- store i32 0, i32* %ptr
+ %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset
+ store i32 0, i32 addrspace(5)* %ptr
ret void
}
@@ -114,11 +114,11 @@
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:20
define amdgpu_kernel void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
entry:
- %array = alloca [8192 x i32]
- %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 4
- store i32 0, i32* %ptr
- %load_ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %offset
- %val = load i32, i32* %load_ptr
+ %array = alloca [8192 x i32], addrspace(5)
+ %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 4
+ store i32 0, i32 addrspace(5)* %ptr
+ %load_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %offset
+ %val = load i32, i32 addrspace(5)* %load_ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 3e77d87..abc1df0 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -342,10 +342,10 @@
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 2
%shl1 = shl i32 %idx.add, 3
- %ptr0 = inttoptr i32 %shl0 to i32*
- %ptr1 = inttoptr i32 %shl1 to i32*
- store volatile i32 9, i32* %ptr0
- store volatile i32 10, i32* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
+ %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
+ store volatile i32 9, i32 addrspace(5)* %ptr0
+ store volatile i32 10, i32 addrspace(5)* %ptr1
ret void
}
@@ -360,10 +360,10 @@
%idx.add = add nuw i32 %idx, 511
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
- %ptr0 = inttoptr i32 %shl0 to i32*
- %ptr1 = inttoptr i32 %shl1 to i32*
- store volatile i32 9, i32* %ptr0
- store volatile i32 10, i32* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
+ %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
+ store volatile i32 9, i32 addrspace(5)* %ptr0
+ store volatile i32 10, i32 addrspace(5)* %ptr1
ret void
}
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
@@ -377,10 +377,10 @@
%idx.add = add nuw i32 %idx, 256
%shl0 = shl i32 %idx.add, 4
%shl1 = shl i32 %idx.add, 5
- %ptr0 = inttoptr i32 %shl0 to i32*
- %ptr1 = inttoptr i32 %shl1 to i32*
- store volatile i32 9, i32* %ptr0
- store volatile i32 10, i32* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
+ %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
+ store volatile i32 9, i32 addrspace(5)* %ptr0
+ store volatile i32 10, i32 addrspace(5)* %ptr1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
index 422049c..32b4f15 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
@@ -8,12 +8,12 @@
# CHECK-LABEL: name: expecting_non_empty_interval
# CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $exec
-# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 8 into %stack.0, align 4)
+# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
# CHECK-NEXT: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
# CHECK-NEXT: dead %2:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $exec
# CHECK: S_NOP 0, implicit %6.sub1
-# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 8 from %stack.0, align 4)
+# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: S_NOP 0, implicit %8.sub1
# CHECK-NEXT: S_NOP 0, implicit undef %9.sub0
diff --git a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
index 322e5ca..ed7e209 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
@@ -1,14 +1,14 @@
; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s
; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #1
+declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #1
; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit
; GCN: ; ScratchSize: 4294967296
define amdgpu_kernel void @stack_size_limit() #0 {
entry:
- %alloca = alloca [1073741823 x i32], align 4
- %bc = bitcast [1073741823 x i32]* %alloca to i8*
- call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i1 true)
+ %alloca = alloca [1073741823 x i32], align 4, addrspace(5)
+ %bc = bitcast [1073741823 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %bc, i8 9, i32 1073741823, i32 1, i1 true)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
index ed5db1f..68c8b31 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
@@ -9,11 +9,11 @@
# CHECK: - { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
# CHECK-NEXT: stack-id: 1,
-# CHECK: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0)
-# CHECK: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0)
+# CHECK: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+# CHECK: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr6, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (store 4 into %stack.1)
-# CHECK: $sgpr6 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (load 4 from %stack.1)
+# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr6, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (store 4 into %stack.1, addrspace 5)
+# CHECK: $sgpr6 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (load 4 from %stack.1, addrspace 5)
name: no_merge_sgpr_vgpr_spill_slot
tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
index d988ea3..2d6c72b 100644
--- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
@@ -187,11 +187,11 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16(i16 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16(i16* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- store i16 %hi, i16 addrspace(4)* %out
+ store i16 %hi, i16* %out
ret void
}
@@ -205,11 +205,11 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2f16(half addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2f16(half* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x half>
%hi = extractelement <2 x half> %value, i32 1
- store half %hi, half addrspace(4)* %out
+ store half %hi, half* %out
ret void
}
@@ -223,11 +223,11 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_i32_shift(i16 addrspace(4)* %out, i32 %value) #0 {
+define void @store_flat_hi_i32_shift(i16* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i16
- store i16 %hi, i16 addrspace(4)* %out
+ store i16 %hi, i16* %out
ret void
}
@@ -241,12 +241,12 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8(i8 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- store i8 %trunc, i8 addrspace(4)* %out
+ store i8 %trunc, i8* %out
ret void
}
@@ -260,11 +260,11 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_i8_shift(i8 addrspace(4)* %out, i32 %value) #0 {
+define void @store_flat_hi_i8_shift(i8* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i8
- store i8 %hi, i8 addrspace(4)* %out
+ store i8 %hi, i8* %out
ret void
}
@@ -278,12 +278,12 @@
; VI: flat_store_short v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_max_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 2047
- store i16 %hi, i16 addrspace(4)* %gep
+ %gep = getelementptr inbounds i16, i16* %out, i64 2047
+ store i16 %hi, i16* %gep
ret void
}
@@ -297,12 +297,12 @@
; VI: flat_store_short v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_neg_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_neg_offset(i16* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 -1023
- store i16 %hi, i16 addrspace(4)* %gep
+ %gep = getelementptr inbounds i16, i16* %out, i64 -1023
+ store i16 %hi, i16* %gep
ret void
}
@@ -316,13 +316,13 @@
; VI: flat_store_byte v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8_max_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 4095
- store i8 %trunc, i8 addrspace(4)* %gep
+ %gep = getelementptr inbounds i8, i8* %out, i64 4095
+ store i8 %trunc, i8* %gep
ret void
}
@@ -337,13 +337,13 @@
; VI: flat_store_byte v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8_neg_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8_neg_offset(i8* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 -4095
- store i8 %trunc, i8 addrspace(4)* %gep
+ %gep = getelementptr inbounds i8, i8* %out, i64 -4095
+ store i8 %trunc, i8* %gep
ret void
}
@@ -357,12 +357,12 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16(i16* %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16(i16 addrspace(5)* %out, i32 %arg) #0 {
entry:
; FIXME: ABI for pre-gfx9
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- store i16 %hi, i16* %out
+ store i16 %hi, i16 addrspace(5)* %out
ret void
}
@@ -376,12 +376,12 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2f16(half* %out, i32 %arg) #0 {
+define void @store_private_hi_v2f16(half addrspace(5)* %out, i32 %arg) #0 {
entry:
; FIXME: ABI for pre-gfx9
%value = bitcast i32 %arg to <2 x half>
%hi = extractelement <2 x half> %value, i32 1
- store half %hi, half* %out
+ store half %hi, half addrspace(5)* %out
ret void
}
@@ -395,11 +395,11 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_i32_shift(i16* %out, i32 %value) #0 {
+define void @store_private_hi_i32_shift(i16 addrspace(5)* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i16
- store i16 %hi, i16* %out
+ store i16 %hi, i16 addrspace(5)* %out
ret void
}
@@ -413,12 +413,12 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16_i8(i8 addrspace(5)* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- store i8 %trunc, i8* %out
+ store i8 %trunc, i8 addrspace(5)* %out
ret void
}
@@ -432,11 +432,11 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_i8_shift(i8* %out, i32 %value) #0 {
+define void @store_private_hi_i8_shift(i8 addrspace(5)* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i8
- store i8 %hi, i8* %out
+ store i8 %hi, i8 addrspace(5)* %out
ret void
}
@@ -449,12 +449,12 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_max_offset(i16* byval %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16_max_offset(i16 addrspace(5)* byval %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds i16, i16* %out, i64 2045
- store i16 %hi, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %out, i64 2045
+ store i16 %hi, i16 addrspace(5)* %gep
ret void
}
@@ -475,7 +475,7 @@
; FIXME: ABI for pre-gfx9
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- store volatile i16 %hi, i16* null
+ store volatile i16 %hi, i16 addrspace(5)* null
ret void
}
@@ -495,7 +495,7 @@
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- store volatile i8 %trunc, i8* null
+ store volatile i8 %trunc, i8 addrspace(5)* null
ret void
}
@@ -599,14 +599,14 @@
; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094
define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i16], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i16], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
- store i16 %hi, i16* %gep
+ %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
+ store i16 %hi, i16 addrspace(5)* %gep
ret void
}
@@ -616,15 +616,15 @@
; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095
define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
%trunc = trunc i16 %hi to i8
- store i8 %trunc, i8* %gep
+ store i8 %trunc, i8 addrspace(5)* %gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/store-private.ll b/llvm/test/CodeGen/AMDGPU/store-private.ll
index ce7656a..617511a 100644
--- a/llvm/test/CodeGen/AMDGPU/store-private.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-private.ll
@@ -15,9 +15,9 @@
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define amdgpu_kernel void @store_i1(i1 addrspace(0)* %out) {
+define amdgpu_kernel void @store_i1(i1 addrspace(5)* %out) {
entry:
- store i1 true, i1 addrspace(0)* %out
+ store i1 true, i1 addrspace(5)* %out
ret void
}
@@ -44,9 +44,9 @@
; SI: buffer_store_byte
-define amdgpu_kernel void @store_i8(i8 addrspace(0)* %out, i8 %in) {
+define amdgpu_kernel void @store_i8(i8 addrspace(5)* %out, i8 %in) {
entry:
- store i8 %in, i8 addrspace(0)* %out
+ store i8 %in, i8 addrspace(5)* %out
ret void
}
@@ -72,9 +72,9 @@
; EG: MOV * T(0 + AR.x).X+, [[RES]]
; SI: buffer_store_short
-define amdgpu_kernel void @store_i16(i16 addrspace(0)* %out, i16 %in) {
+define amdgpu_kernel void @store_i16(i16 addrspace(5)* %out, i16 %in) {
entry:
- store i16 %in, i16 addrspace(0)* %out
+ store i16 %in, i16 addrspace(5)* %out
ret void
}
@@ -102,9 +102,9 @@
; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
; CM: MOVA_INT
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
-define amdgpu_kernel void @store_i24(i24 addrspace(0)* %out, i24 %in) {
+define amdgpu_kernel void @store_i24(i24 addrspace(5)* %out, i24 %in) {
entry:
- store i24 %in, i24 addrspace(0)* %out
+ store i24 %in, i24 addrspace(5)* %out
ret void
}
@@ -120,9 +120,9 @@
; CM: MOVA_INT
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; CM-NOT: MOVA_INT
-define amdgpu_kernel void @store_i25(i25 addrspace(0)* %out, i25 %in) {
+define amdgpu_kernel void @store_i25(i25 addrspace(5)* %out, i25 %in) {
entry:
- store i25 %in, i25 addrspace(0)* %out
+ store i25 %in, i25 addrspace(5)* %out
ret void
}
@@ -141,10 +141,10 @@
; CM-NOT: MOVA_INT
; SI: buffer_store_short
-define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
- store <2 x i8> %0, <2 x i8> addrspace(0)* %out
+ store <2 x i8> %0, <2 x i8> addrspace(5)* %out
ret void
}
@@ -172,10 +172,10 @@
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
- store <2 x i8> %0, <2 x i8> addrspace(0)* %out, align 1
+ store <2 x i8> %0, <2 x i8> addrspace(5)* %out, align 1
ret void
}
@@ -191,10 +191,10 @@
; CM-NOT: MOVA_INT
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
- store <2 x i16> %0, <2 x i16> addrspace(0)* %out
+ store <2 x i16> %0, <2 x i16> addrspace(5)* %out
ret void
}
@@ -223,10 +223,10 @@
; SI: buffer_store_short
; SI: buffer_store_short
-define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
- store <2 x i16> %0, <2 x i16> addrspace(0)* %out, align 2
+ store <2 x i16> %0, <2 x i16> addrspace(5)* %out, align 2
ret void
}
@@ -240,10 +240,10 @@
; CM-NOT: MOVA_INT
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
- store <4 x i8> %0, <4 x i8> addrspace(0)* %out
+ store <4 x i8> %0, <4 x i8> addrspace(5)* %out
ret void
}
@@ -299,10 +299,10 @@
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
-define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
- store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 1
+ store <4 x i8> %0, <4 x i8> addrspace(5)* %out, align 1
ret void
}
@@ -410,10 +410,10 @@
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
-define amdgpu_kernel void @store_v8i8_unaligned(<8 x i8> addrspace(0)* %out, <8 x i32> %in) {
+define amdgpu_kernel void @store_v8i8_unaligned(<8 x i8> addrspace(5)* %out, <8 x i32> %in) {
entry:
%0 = trunc <8 x i32> %in to <8 x i8>
- store <8 x i8> %0, <8 x i8> addrspace(0)* %out, align 1
+ store <8 x i8> %0, <8 x i8> addrspace(5)* %out, align 1
ret void
}
@@ -443,10 +443,10 @@
; SI: buffer_store_short
; SI: buffer_store_short
; SI-NOT: buffer_store_dword
-define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
- store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 2
+ store <4 x i8> %0, <4 x i8> addrspace(5)* %out, align 2
ret void
}
@@ -460,8 +460,8 @@
; SI: buffer_store_dword
-define amdgpu_kernel void @store_f32(float addrspace(0)* %out, float %in) {
- store float %in, float addrspace(0)* %out
+define amdgpu_kernel void @store_f32(float addrspace(5)* %out, float %in) {
+ store float %in, float addrspace(5)* %out
ret void
}
@@ -480,10 +480,10 @@
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i16>
- store <4 x i16> %0, <4 x i16> addrspace(0)* %out
+ store <4 x i16> %0, <4 x i16> addrspace(5)* %out
ret void
}
@@ -504,11 +504,11 @@
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(0)* %out, float %a, float %b) {
+define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(5)* %out, float %a, float %b) {
entry:
%0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
%1 = insertelement <2 x float> %0, float %b, i32 1
- store <2 x float> %1, <2 x float> addrspace(0)* %out
+ store <2 x float> %1, <2 x float> addrspace(5)* %out
ret void
}
@@ -533,8 +533,8 @@
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(0)* %out, <3 x i32> %a) nounwind {
- store <3 x i32> %a, <3 x i32> addrspace(0)* %out, align 16
+define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(5)* %out, <3 x i32> %a) nounwind {
+ store <3 x i32> %a, <3 x i32> addrspace(5)* %out, align 16
ret void
}
@@ -563,9 +563,9 @@
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(5)* %out, <4 x i32> %in) {
entry:
- store <4 x i32> %in, <4 x i32> addrspace(0)* %out
+ store <4 x i32> %in, <4 x i32> addrspace(5)* %out
ret void
}
@@ -594,9 +594,9 @@
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(5)* %out, <4 x i32> %in) {
entry:
- store <4 x i32> %in, <4 x i32> addrspace(0)* %out, align 4
+ store <4 x i32> %in, <4 x i32> addrspace(5)* %out, align 4
ret void
}
@@ -626,9 +626,9 @@
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(0)* %out, <4 x float> addrspace(0)* %in) {
- %1 = load <4 x float>, <4 x float> addrspace(0) * %in
- store <4 x float> %1, <4 x float> addrspace(0)* %out
+define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(5)* %out, <4 x float> addrspace(5)* %in) {
+ %1 = load <4 x float>, <4 x float> addrspace(5)* %in
+ store <4 x float> %1, <4 x float> addrspace(5)* %out
ret void
}
@@ -644,10 +644,10 @@
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define amdgpu_kernel void @store_i64_i8(i8 addrspace(0)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i8(i8 addrspace(5)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
- store i8 %0, i8 addrspace(0)* %out
+ store i8 %0, i8 addrspace(5)* %out
ret void
}
@@ -663,10 +663,10 @@
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_short
-define amdgpu_kernel void @store_i64_i16(i16 addrspace(0)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i16(i16 addrspace(5)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
- store i16 %0, i16 addrspace(0)* %out
+ store i16 %0, i16 addrspace(5)* %out
ret void
}
@@ -689,14 +689,14 @@
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @vecload2(i32 addrspace(0)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
%0 = load i32, i32 addrspace(2)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
%1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
- store i32 %0, i32 addrspace(0)* %out, align 4
- %arrayidx1 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1
- store i32 %1, i32 addrspace(0)* %arrayidx1, align 4
+ store i32 %0, i32 addrspace(5)* %out, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 1
+ store i32 %1, i32 addrspace(5)* %arrayidx1, align 4
ret void
}
@@ -727,15 +727,15 @@
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @i128-const-store(i32 addrspace(0)* %out) {
+define amdgpu_kernel void @i128-const-store(i32 addrspace(5)* %out) {
entry:
- store i32 1, i32 addrspace(0)* %out, align 4
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1
- store i32 1, i32 addrspace(0)* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 2
- store i32 2, i32 addrspace(0)* %arrayidx4, align 4
- %arrayidx6 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 3
- store i32 2, i32 addrspace(0)* %arrayidx6, align 4
+ store i32 1, i32 addrspace(5)* %out, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 1
+ store i32 1, i32 addrspace(5)* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 2
+ store i32 2, i32 addrspace(5)* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 3
+ store i32 2, i32 addrspace(5)* %arrayidx6, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/store-vector-ptrs.ll b/llvm/test/CodeGen/AMDGPU/store-vector-ptrs.ll
index 26f8bde..a2146b8 100644
--- a/llvm/test/CodeGen/AMDGPU/store-vector-ptrs.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-vector-ptrs.ll
@@ -5,8 +5,8 @@
; AMDGPUDAGToDAGISel::SelectMUBUFScratch() which is used for selecting
; scratch loads and stores.
; CHECK-LABEL: {{^}}store_vector_ptrs:
-define amdgpu_kernel void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
- %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
- store <4 x i32*> %p, <4 x i32*>* %out
+define amdgpu_kernel void @store_vector_ptrs(<4 x i32 addrspace(5)*> addrspace(5)* %out, <4 x [1024 x i32] addrspace(5)*> %array) nounwind {
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(5)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ store <4 x i32 addrspace(5)*> %p, <4 x i32 addrspace(5)*> addrspace(5)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/syncscopes.ll b/llvm/test/CodeGen/AMDGPU/syncscopes.ll
index 8600753..83cf6d4 100644
--- a/llvm/test/CodeGen/AMDGPU/syncscopes.ll
+++ b/llvm/test/CodeGen/AMDGPU/syncscopes.ll
@@ -1,19 +1,19 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s
; GCN-LABEL: name: syncscopes
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
define void @syncscopes(
i32 %agent,
- i32 addrspace(4)* %agent_out,
+ i32* %agent_out,
i32 %workgroup,
- i32 addrspace(4)* %workgroup_out,
+ i32* %workgroup_out,
i32 %wavefront,
- i32 addrspace(4)* %wavefront_out) {
+ i32* %wavefront_out) {
entry:
- store atomic i32 %agent, i32 addrspace(4)* %agent_out syncscope("agent") seq_cst, align 4
- store atomic i32 %workgroup, i32 addrspace(4)* %workgroup_out syncscope("workgroup") seq_cst, align 4
- store atomic i32 %wavefront, i32 addrspace(4)* %wavefront_out syncscope("wavefront") seq_cst, align 4
+ store atomic i32 %agent, i32* %agent_out syncscope("agent") seq_cst, align 4
+ store atomic i32 %workgroup, i32* %workgroup_out syncscope("workgroup") seq_cst, align 4
+ store atomic i32 %wavefront, i32* %wavefront_out syncscope("wavefront") seq_cst, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/target-cpu.ll b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
index 466e89e..d0ddd2a 100644
--- a/llvm/test/CodeGen/AMDGPU/target-cpu.ll
+++ b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
@@ -81,10 +81,10 @@
; CHECK: ; LDSByteSize: 5120
define amdgpu_kernel void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
- %load = load i32, i32* %arrayidx1
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
+ %load = load i32, i32 addrspace(5)* %arrayidx1
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -95,10 +95,10 @@
; CHECK: ScratchSize: 24
define amdgpu_kernel void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
- %load = load i32, i32* %arrayidx1
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
+ %load = load i32, i32 addrspace(5)* %arrayidx1
store i32 %load, i32 addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
index 8b74e02..bea8d9d 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
@@ -48,7 +48,7 @@
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
$vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
$sgpr0_sgpr1 = COPY $exec, implicit-def $exec
- SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4)
+ SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
@@ -66,7 +66,7 @@
bb.2:
successors:
- $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4)
+ $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5)
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
bb.3:
@@ -82,7 +82,7 @@
# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder
# CHECK: $sgpr0_sgpr1 = COPY $exec, implicit-def $exec
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
-# CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4)
+# CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
# CHECK: $exec = COPY killed $sgpr2_sgpr3
name: undefined_physreg_sgpr_spill_reorder
alignment: 0
@@ -112,7 +112,7 @@
$vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
$sgpr0_sgpr1 = COPY $exec, implicit-def $exec
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
- SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4)
+ SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_BRANCH %bb.1
@@ -129,7 +129,7 @@
bb.2:
successors:
- $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4)
+ $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5)
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
bb.3:
diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir b/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
index a635bd8..548376d 100644
--- a/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
@@ -2,32 +2,32 @@
--- |
define amdgpu_kernel void @fold_fi_vgpr() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_vgpr_fi() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_sgpr_fi() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_fi_sgpr() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_fi_imm() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_imm_fi() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll
index 813255a..6df033f 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll
@@ -10,13 +10,13 @@
; XGCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]]
; XGCN: s_waitcnt vmcnt(0) lgkmcnt(0)
; XGCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @test(i32 addrspace(4)* %out, i32 %in) {
- store volatile i32 0, i32 addrspace(4)* %out
- %val = load volatile i32, i32 addrspace(4)* %out
+define amdgpu_kernel void @test(i32* %out, i32 %in) {
+ store volatile i32 0, i32* %out
+ %val = load volatile i32, i32* %out
ret void
}
-; Make sure lgkmcnt isn't used for global_* instructions
+; Make sure lgkmcnt isn't used for global_ addrspace(5)* instructions
; GCN-LABEL: {{^}}test_waitcnt_type_flat_global:
; GFX9: global_load_dword [[LD:v[0-9]+]]
; GFX9-NEXT: s_waitcnt vmcnt(0){{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
index 2a3ce4d..f71a568 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
@@ -17,8 +17,8 @@
define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture %arg) local_unnamed_addr #0 {
bb:
- store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4
- store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4
+ store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4
+ store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4
br label %bb18
bb1: ; preds = %bb18
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
index 0ffd543..4c0a056 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
@@ -3,8 +3,8 @@
--- |
define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
<4 x i32> addrspace(1)* %global16,
- i32 addrspace(4)* %flat4,
- <4 x i32> addrspace(4)* %flat16) {
+ i32* %flat4,
+ <4 x i32>* %flat16) {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 166901f..73265e8 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -657,17 +657,17 @@
; CHECK: buffer_store_dwordx4
define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
entry:
- %array = alloca [32 x i32], align 4
+ %array = alloca [32 x i32], align 4, addrspace(5)
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
- %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0
- store volatile i32 %a, i32* %s.gep, align 4
+ %s.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 0
+ store volatile i32 %a, i32 addrspace(5)* %s.gep, align 4
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
- %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx
- %c = load i32, i32* %c.gep, align 4
+ %c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx
+ %c = load i32, i32 addrspace(5)* %c.gep, align 4
%c.bc = bitcast i32 %c to float
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
diff --git a/llvm/test/DebugInfo/AMDGPU/code-pointer-size.ll b/llvm/test/DebugInfo/AMDGPU/code-pointer-size.ll
index 4e9d26b..8be11c7 100644
--- a/llvm/test/DebugInfo/AMDGPU/code-pointer-size.ll
+++ b/llvm/test/DebugInfo/AMDGPU/code-pointer-size.ll
@@ -4,11 +4,11 @@
;
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
;
-; kernel void kernel1(global int *A) {
+; kernel void kernel1(global int addrspace(5)*A) {
; *A = 11;
; }
;
-; kernel void kernel2(global int *B) {
+; kernel void kernel2(global int addrspace(5)*B) {
; *B = 12;
; }
@@ -20,20 +20,20 @@
define amdgpu_kernel void @kernel1(i32 addrspace(1)* %A) !dbg !7 {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !16, metadata !17), !dbg !18
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !19
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !16, metadata !17), !dbg !18
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !19
store i32 11, i32 addrspace(1)* %0, align 4, !dbg !20
ret void, !dbg !21
}
define amdgpu_kernel void @kernel2(i32 addrspace(1)* %B) !dbg !22 {
entry:
- %B.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !23, metadata !17), !dbg !24
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !25
+ %B.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %B, i32 addrspace(1)* addrspace(5)* %B.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %B.addr, metadata !23, metadata !17), !dbg !24
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %B.addr, align 4, !dbg !25
store i32 12, i32 addrspace(1)* %0, align 4, !dbg !26
ret void, !dbg !27
}
@@ -57,7 +57,7 @@
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!12 = !{i32 1}
!13 = !{!"none"}
-!14 = !{!"int*"}
+!14 = !{!"int addrspace(5)*"}
!15 = !{!""}
!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
diff --git a/llvm/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll b/llvm/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll
index 19ca08a..bc8548f 100644
--- a/llvm/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll
+++ b/llvm/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll
@@ -4,11 +4,11 @@
;
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
;
-; kernel void kernel1(global int *A) {
+; kernel void kernel1(global int addrspace(5)*A) {
; *A = 11;
; }
;
-; kernel void kernel2(global int *B) {
+; kernel void kernel2(global int addrspace(5)*B) {
; *B = 12;
; }
@@ -19,20 +19,20 @@
define amdgpu_kernel void @kernel1(i32 addrspace(1)* %A) !dbg !7 {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !16, metadata !17), !dbg !18
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !19
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !16, metadata !17), !dbg !18
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !19
store i32 11, i32 addrspace(1)* %0, align 4, !dbg !20
ret void, !dbg !21
}
define amdgpu_kernel void @kernel2(i32 addrspace(1)* %B) !dbg !22 {
entry:
- %B.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !23, metadata !17), !dbg !24
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !25
+ %B.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %B, i32 addrspace(1)* addrspace(5)* %B.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %B.addr, metadata !23, metadata !17), !dbg !24
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %B.addr, align 4, !dbg !25
store i32 12, i32 addrspace(1)* %0, align 4, !dbg !26
ret void, !dbg !27
}
@@ -56,7 +56,7 @@
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!12 = !{i32 1}
!13 = !{!"none"}
-!14 = !{!"int*"}
+!14 = !{!"int addrspace(5)*"}
!15 = !{!""}
!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
diff --git a/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll b/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll
index 2cb0b01..6a3dfc5 100644
--- a/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll
+++ b/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll
@@ -5,11 +5,11 @@
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
;
; kernel void kernel1() {
-; global int *FuncVar0 = 0;
-; constant int *FuncVar1 = 0;
-; local int *FuncVar2 = 0;
-; private int *FuncVar3 = 0;
-; int *FuncVar4 = 0;
+; global int addrspace(5)*FuncVar0 = 0;
+; constant int addrspace(5)*FuncVar1 = 0;
+; local int addrspace(5)*FuncVar2 = 0;
+; private int addrspace(5)*FuncVar3 = 0;
+; int addrspace(5)*FuncVar4 = 0;
; }
; CHECK: DW_AT_name {{.*}}"FuncVar0"
@@ -53,21 +53,21 @@
define amdgpu_kernel void @kernel1() !dbg !7 {
entry:
- %FuncVar0 = alloca i32 addrspace(1)*, align 4
- %FuncVar1 = alloca i32 addrspace(2)*, align 4
- %FuncVar2 = alloca i32 addrspace(3)*, align 4
- %FuncVar3 = alloca i32*, align 4
- %FuncVar4 = alloca i32 addrspace(4)*, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %FuncVar0, metadata !10, metadata !13), !dbg !14
- store i32 addrspace(1)* null, i32 addrspace(1)** %FuncVar0, align 4, !dbg !14
- call void @llvm.dbg.declare(metadata i32 addrspace(2)** %FuncVar1, metadata !15, metadata !13), !dbg !16
- store i32 addrspace(2)* null, i32 addrspace(2)** %FuncVar1, align 4, !dbg !16
- call void @llvm.dbg.declare(metadata i32 addrspace(3)** %FuncVar2, metadata !17, metadata !13), !dbg !19
- store i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)** %FuncVar2, align 4, !dbg !19
- call void @llvm.dbg.declare(metadata i32** %FuncVar3, metadata !20, metadata !13), !dbg !22
- store i32* addrspacecast (i32 addrspace(4)* null to i32*), i32** %FuncVar3, align 4, !dbg !22
- call void @llvm.dbg.declare(metadata i32 addrspace(4)** %FuncVar4, metadata !23, metadata !13), !dbg !24
- store i32 addrspace(4)* null, i32 addrspace(4)** %FuncVar4, align 4, !dbg !24
+ %FuncVar0 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ %FuncVar1 = alloca i32 addrspace(2)*, align 4, addrspace(5)
+ %FuncVar2 = alloca i32 addrspace(3)*, align 4, addrspace(5)
+ %FuncVar3 = alloca i32 addrspace(5)*, align 4, addrspace(5)
+ %FuncVar4 = alloca i32*, align 4, addrspace(5)
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %FuncVar0, metadata !10, metadata !13), !dbg !14
+ store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %FuncVar0, align 4, !dbg !14
+ call void @llvm.dbg.declare(metadata i32 addrspace(2)* addrspace(5)* %FuncVar1, metadata !15, metadata !13), !dbg !16
+ store i32 addrspace(2)* null, i32 addrspace(2)* addrspace(5)* %FuncVar1, align 4, !dbg !16
+ call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* %FuncVar2, metadata !17, metadata !13), !dbg !19
+ store i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* addrspace(5)* %FuncVar2, align 4, !dbg !19
+ call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* %FuncVar3, metadata !20, metadata !13), !dbg !22
+ store i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %FuncVar3, align 4, !dbg !22
+ call void @llvm.dbg.declare(metadata i32* addrspace(5)* %FuncVar4, metadata !23, metadata !13), !dbg !24
+ store i32* null, i32* addrspace(5)* %FuncVar4, align 4, !dbg !24
ret void, !dbg !25
}
diff --git a/llvm/test/DebugInfo/AMDGPU/variable-locations.ll b/llvm/test/DebugInfo/AMDGPU/variable-locations.ll
index a97d018..692769b 100644
--- a/llvm/test/DebugInfo/AMDGPU/variable-locations.ll
+++ b/llvm/test/DebugInfo/AMDGPU/variable-locations.ll
@@ -7,7 +7,7 @@
; global int GlobA;
; global int GlobB;
;
-; kernel void kernel1(unsigned int ArgN, global int *ArgA, global int *ArgB) {
+; kernel void kernel1(unsigned int ArgN, global int addrspace(5)*ArgA, global int addrspace(5)*ArgB) {
; ArgA[ArgN] += ArgB[ArgN];
; }
@@ -45,22 +45,22 @@
; CHECK-NEXT: DW_AT_name {{.*}}"ArgB"
i32 addrspace(1)* %ArgB) !dbg !13 {
entry:
- %ArgN.addr = alloca i32, align 4
- %ArgA.addr = alloca i32 addrspace(1)*, align 4
- %ArgB.addr = alloca i32 addrspace(1)*, align 4
- store i32 %ArgN, i32* %ArgN.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %ArgN.addr, metadata !22, metadata !23), !dbg !24
- store i32 addrspace(1)* %ArgA, i32 addrspace(1)** %ArgA.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgA.addr, metadata !25, metadata !23), !dbg !26
- store i32 addrspace(1)* %ArgB, i32 addrspace(1)** %ArgB.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgB.addr, metadata !27, metadata !23), !dbg !28
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgB.addr, align 4, !dbg !29
- %1 = load i32, i32* %ArgN.addr, align 4, !dbg !30
+ %ArgN.addr = alloca i32, align 4, addrspace(5)
+ %ArgA.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ %ArgB.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 %ArgN, i32 addrspace(5)* %ArgN.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(5)* %ArgN.addr, metadata !22, metadata !23), !dbg !24
+ store i32 addrspace(1)* %ArgA, i32 addrspace(1)* addrspace(5)* %ArgA.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %ArgA.addr, metadata !25, metadata !23), !dbg !26
+ store i32 addrspace(1)* %ArgB, i32 addrspace(1)* addrspace(5)* %ArgB.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %ArgB.addr, metadata !27, metadata !23), !dbg !28
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %ArgB.addr, align 4, !dbg !29
+ %1 = load i32, i32 addrspace(5)* %ArgN.addr, align 4, !dbg !30
%idxprom = zext i32 %1 to i64, !dbg !29
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 %idxprom, !dbg !29
%2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !29
- %3 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgA.addr, align 4, !dbg !31
- %4 = load i32, i32* %ArgN.addr, align 4, !dbg !32
+ %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %ArgA.addr, align 4, !dbg !31
+ %4 = load i32, i32 addrspace(5)* %ArgN.addr, align 4, !dbg !32
%idxprom1 = zext i32 %4 to i64, !dbg !31
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %idxprom1, !dbg !31
%5 = load i32, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
@@ -94,7 +94,7 @@
!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
!18 = !{i32 0, i32 1, i32 1}
!19 = !{!"none", !"none", !"none"}
-!20 = !{!"uint", !"int*", !"int*"}
+!20 = !{!"uint", !"int addrspace(5)*", !"int addrspace(5)*"}
!21 = !{!"", !"", !""}
!22 = !DILocalVariable(name: "ArgN", arg: 1, scope: !13, file: !3, line: 4, type: !16)
!23 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
index adeba26..e21392f 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
@@ -5,7 +5,7 @@
; CHECK: br
; CHECK-NOT: addrspacecast
define i64 @no_sink_local_to_flat(i1 %pred, i64 addrspace(3)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64 addrspace(4)*
+ %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
@@ -13,7 +13,7 @@
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -21,16 +21,16 @@
; CHECK: addrspacecast
; CHECK: br
; CHECK-NOT: addrspacecast
-define i64 @no_sink_private_to_flat(i1 %pred, i64* %ptr) {
- %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(4)*
+define i64 @no_sink_private_to_flat(i1 %pred, i64 addrspace(5)* %ptr) {
+ %ptr_cast = addrspacecast i64 addrspace(5)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64* %ptr
+ %v1 = load i64, i64 addrspace(5)* %ptr
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -40,7 +40,7 @@
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_global_to_flat(i1 %pred, i64 addrspace(1)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64 addrspace(4)*
+ %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
@@ -48,7 +48,7 @@
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -56,12 +56,12 @@
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_global(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(1)*
+define i64 @sink_flat_to_global(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(1)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -73,12 +73,12 @@
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_constant(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(2)*
+define i64 @sink_flat_to_constant(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(2)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -90,12 +90,12 @@
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_local(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(3)*
+define i64 @sink_flat_to_local(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(3)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -107,15 +107,15 @@
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_private(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64*
+define i64 @sink_flat_to_private(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(5)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
- %v2 = load i64, i64* %ptr_cast
+ %v2 = load i64, i64 addrspace(5)* %ptr_cast
ret i64 %v2
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
index 1eab707..f70c36a 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
@@ -3,69 +3,69 @@
; Trivial optimization of generic addressing
; CHECK-LABEL: @load_global_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+define float @load_global_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
%tmp1 = load float, float addrspace(1)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_constant_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(2)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_constant_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)*
+define float @load_constant_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)*
%tmp1 = load float, float addrspace(2)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_group_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+define float @load_group_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
%tmp1 = load float, float addrspace(3)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_private_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
-; CHECK-NEXT: %tmp1 = load float, float* %tmp0
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+; CHECK-NEXT: %tmp1 = load float, float addrspace(5)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
- %tmp1 = load float, float* %tmp0
+define float @load_private_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+ %tmp1 = load float, float addrspace(5)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @store_global_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0
-define amdgpu_kernel void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+define amdgpu_kernel void @store_global_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
store float 0.0, float addrspace(1)* %tmp0
ret void
}
; CHECK-LABEL: @store_group_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0
-define amdgpu_kernel void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+define amdgpu_kernel void @store_group_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
store float 0.0, float addrspace(3)* %tmp0
ret void
}
; CHECK-LABEL: @store_private_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
-; CHECK-NEXT: store float 0.000000e+00, float* %tmp0
-define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
- store float 0.0, float* %tmp0
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+; CHECK-NEXT: store float 0.000000e+00, float addrspace(5)* %tmp0
+define amdgpu_kernel void @store_private_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+ store float 0.0, float addrspace(5)* %tmp0
ret void
}
@@ -75,10 +75,10 @@
; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4
; CHECK-NEXT: ret void
define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
@@ -88,95 +88,95 @@
; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4
; CHECK-NEXT: ret void
define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; Optimized to private load/store.
; CHECK-LABEL: @load_store_private(
-; CHECK-NEXT: %val = load i32, i32* %input, align 4
-; CHECK-NEXT: store i32 %val, i32* %output, align 4
+; CHECK-NEXT: %val = load i32, i32 addrspace(5)* %input, align 4
+; CHECK-NEXT: store i32 %val, i32 addrspace(5)* %output, align 4
; CHECK-NEXT: ret void
-define amdgpu_kernel void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+define amdgpu_kernel void @load_store_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; No optimization. flat load/store.
; CHECK-LABEL: @load_store_flat(
-; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4
-; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4
+; CHECK-NEXT: %val = load i32, i32* %input, align 4
+; CHECK-NEXT: store i32 %val, i32* %output, align 4
; CHECK-NEXT: ret void
-define amdgpu_kernel void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 {
- %val = load i32, i32 addrspace(4)* %input, align 4
- store i32 %val, i32 addrspace(4)* %output, align 4
+define amdgpu_kernel void @load_store_flat(i32* nocapture %input, i32* nocapture %output) #0 {
+ %val = load i32, i32* %input, align 4
+ store i32 %val, i32* %output, align 4
ret void
}
; CHECK-LABEL: @store_addrspacecast_ptr_value(
-; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
-; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
-define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 {
- %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
+; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32*
+; CHECK-NEXT: store i32* %cast, i32* addrspace(1)* %output, align 4
+define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32* addrspace(1)* nocapture %output) #0 {
+ %cast = addrspacecast i32 addrspace(1)* %input to i32*
+ store i32* %cast, i32* addrspace(1)* %output, align 4
ret void
}
; CHECK-LABEL: @atomicrmw_add_global_to_flat(
; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst
define i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = atomicrmw add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @atomicrmw_add_group_to_flat(
; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst
define i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = atomicrmw add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @cmpxchg_global_to_flat(
; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic
define { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; CHECK-LABEL: @cmpxchg_group_to_flat(
; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic
define { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; Not pointer operand
; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand(
-; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)*
-; CHECK: %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic
-define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32 addrspace(4)* %val) #0 {
- %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic
- ret { i32 addrspace(4)*, i1 } %ret
+; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
+; CHECK: %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
+define { i32*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32* %val) #0 {
+ %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
+ %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
+ ret { i32*, i1 } %ret
}
; Null pointer in local addr space
; CHECK-LABEL: @local_nullptr
-; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
; CHECK-NOT: i8 addrspace(3)* null
define void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) {
entry:
- %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+ %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
%conv = zext i1 %tobool to i32
store i32 %conv, i32 addrspace(1)* %results, align 4
ret void
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
index b185ede..0a5e7a5 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
@@ -3,57 +3,57 @@
; CHECK-LABEL: @icmp_flat_cmp_self(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.0
define i1 @icmp_flat_cmp_self(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_flat_from_group(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.1
define i1 @icmp_flat_flat_from_group(i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %cast0, %cast1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_from_group_private(
-; CHECK: %1 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)*
-; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %2
-define i1 @icmp_mismatch_flat_from_group_private(i32* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1
+; CHECK: %1 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
+; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %1, %2
+define i1 @icmp_mismatch_flat_from_group_private(i32 addrspace(5)* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %cast0, %cast1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_group_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %flat.ptr.1
-define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %flat.ptr.1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, %flat.ptr.1
+define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32* %flat.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, %flat.ptr.1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_flat_group(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %1
-define i1 @icmp_flat_flat_group(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %cast1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %flat.ptr.0, %1
+define i1 @icmp_flat_flat_group(i32* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %flat.ptr.0, %cast1
ret i1 %cmp
}
; Keeping as cmp addrspace(3)* is better
; CHECK-LABEL: @icmp_flat_to_group_cmp(
-; CHECK: %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)*
-; CHECK: %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)*
+; CHECK: %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)*
+; CHECK: %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)*
; CHECK: %cmp = icmp eq i32 addrspace(3)* %cast0, %cast1
-define i1 @icmp_flat_to_group_cmp(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)*
- %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)*
+define i1 @icmp_flat_to_group_cmp(i32* %flat.ptr.0, i32* %flat.ptr.1) #0 {
+ %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)*
+ %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)*
%cmp = icmp eq i32 addrspace(3)* %cast0, %cast1
ret i1 %cmp
}
@@ -62,35 +62,35 @@
; constant cast if this is OK to change if 0 is a valid pointer.
; CHECK-LABEL: @icmp_group_flat_cmp_null(
-; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* null to i32 addrspace(3)*)
define i1 @icmp_group_flat_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, null
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, null
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_constant_inttoptr(
-; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* inttoptr (i64 400 to i32 addrspace(4)*) to i32 addrspace(3)*)
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* inttoptr (i64 400 to i32*) to i32 addrspace(3)*)
define i1 @icmp_group_flat_cmp_constant_inttoptr(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, inttoptr (i64 400 to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, inttoptr (i64 400 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32* null to i32 addrspace(4)*)
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(5)* null to i32*)
define i1 @icmp_mismatch_flat_group_private_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* null to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* null to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef
define i1 @icmp_mismatch_flat_group_private_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* undef to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* undef to i32*)
ret i1 %cmp
}
@@ -98,62 +98,62 @@
@global0 = internal addrspace(1) global i32 0, align 4
; CHECK-LABEL: @icmp_mismatch_flat_group_global_cmp_gv(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(1)* @global0 to i32*)
define i1 @icmp_mismatch_flat_group_global_cmp_gv(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_group_global_cmp_gv_gv(
-; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*)
define i1 @icmp_mismatch_group_global_cmp_gv_gv(i32 addrspace(3)* %group.ptr.0) #0 {
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_undef(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef
define i1 @icmp_group_flat_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, undef
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, undef
ret i1 %cmp
}
; Test non-canonical orders
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %1
define i1 @icmp_mismatch_flat_group_private_cmp_null_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_undef_swap(
; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0
define i1 @icmp_group_flat_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* undef, %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* undef, %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef_swap(
; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0
define i1 @icmp_mismatch_flat_group_private_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* undef to i32 addrspace(4)*), %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* undef to i32*), %cast0
ret i1 %cmp
}
; TODO: Should be handled
; CHECK-LABEL: @icmp_flat_flat_from_group_vector(
-; CHECK: %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1
+; CHECK: %cmp = icmp eq <2 x i32*> %cast0, %cast1
define <2 x i1> @icmp_flat_flat_from_group_vector(<2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
- %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
- %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
- %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1
+ %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+ %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+ %cmp = icmp eq <2 x i32*> %cast0, %cast1
ret <2 x i1> %cmp
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
index 52067cd..3096d81 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
@@ -30,29 +30,29 @@
; CHECK: ret void
define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 {
bb:
- %tmp = load float, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4
+ %tmp = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
call void @use(float %tmp)
- store float %v, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4
+ store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
call void @llvm.amdgcn.s.barrier()
- %tmp1 = addrspacecast float addrspace(3)* @scalar to float addrspace(4)*
- %tmp2 = load float, float addrspace(4)* %tmp1, align 4
+ %tmp1 = addrspacecast float addrspace(3)* @scalar to float*
+ %tmp2 = load float, float* %tmp1, align 4
call void @use(float %tmp2)
- store float %v, float addrspace(4)* %tmp1, align 4
+ store float %v, float* %tmp1, align 4
call void @llvm.amdgcn.s.barrier()
- %tmp3 = load float, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
+ %tmp3 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
call void @use(float %tmp3)
- store float %v, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
+ store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
call void @llvm.amdgcn.s.barrier()
- %tmp4 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5
- %tmp5 = load float, float addrspace(4)* %tmp4, align 4
+ %tmp4 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
+ %tmp5 = load float, float* %tmp4, align 4
call void @use(float %tmp5)
- store float %v, float addrspace(4)* %tmp4, align 4
+ store float %v, float* %tmp4, align 4
call void @llvm.amdgcn.s.barrier()
- %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*
- %tmp7 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* %tmp6, i32 0, i32 %i
- %tmp8 = load float, float addrspace(4)* %tmp7, align 4
+ %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
+ %tmp7 = getelementptr inbounds [10 x float], [10 x float]* %tmp6, i32 0, i32 %i
+ %tmp8 = load float, float* %tmp7, align 4
call void @use(float %tmp8)
- store float %v, float addrspace(4)* %tmp7, align 4
+ store float %v, float* %tmp7, align 4
call void @llvm.amdgcn.s.barrier()
ret void
}
@@ -61,7 +61,7 @@
; CHECK: %tmp = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*), align 4
define i32 @constexpr_load_int_from_float_lds() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32 addrspace(4)*), align 4
+ %tmp = load i32, i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32*), align 4
ret i32 %tmp
}
@@ -73,18 +73,18 @@
; CHECK: ret i32 %tmp4
define i32 @load_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) #0 {
bb:
- %tmp = addrspacecast float addrspace(1)* %input to float addrspace(4)*
- %tmp1 = getelementptr float, float addrspace(4)* %tmp, i32 %i
- %tmp2 = getelementptr float, float addrspace(4)* %tmp1, i32 %j
- %tmp3 = bitcast float addrspace(4)* %tmp2 to i32 addrspace(4)*
- %tmp4 = load i32, i32 addrspace(4)* %tmp3
+ %tmp = addrspacecast float addrspace(1)* %input to float*
+ %tmp1 = getelementptr float, float* %tmp, i32 %i
+ %tmp2 = getelementptr float, float* %tmp1, i32 %j
+ %tmp3 = bitcast float* %tmp2 to i32*
+ %tmp4 = load i32, i32* %tmp3
ret i32 %tmp4
}
; CHECK-LABEL: @nested_const_expr(
; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4
define amdgpu_kernel void @nested_const_expr() #0 {
- store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)* getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1) to i32 addrspace(4)*), align 4
+ store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4
ret void
}
@@ -95,10 +95,10 @@
; CHECK-NEXT: ret void
define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 {
bb:
- %generic_input = addrspacecast float addrspace(1)* %input to float addrspace(4)*
- %addr = getelementptr float, float addrspace(4)* %generic_input, i64 10
- %v = load float, float addrspace(4)* %addr
- store float %v, float addrspace(4)* %addr
+ %generic_input = addrspacecast float addrspace(1)* %input to float*
+ %addr = getelementptr float, float* %generic_input, i64 10
+ %v = load float, float* %addr
+ store float %v, float* %addr
ret void
}
@@ -119,27 +119,27 @@
; CHECK: br i1 %exit_cond, label %exit, label %loop
define amdgpu_kernel void @loop() #0 {
entry:
- %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
- %end = getelementptr float, float addrspace(4)* %p, i64 10
+ %p = addrspacecast [10 x float] addrspace(3)* @array to float*
+ %end = getelementptr float, float* %p, i64 10
br label %loop
loop: ; preds = %loop, %entry
- %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
- %v = load float, float addrspace(4)* %i
+ %i = phi float* [ %p, %entry ], [ %i2, %loop ]
+ %v = load float, float* %i
call void @use(float %v)
- %i2 = getelementptr float, float addrspace(4)* %i, i64 1
- %exit_cond = icmp eq float addrspace(4)* %i2, %end
+ %i2 = getelementptr float, float* %i, i64 1
+ %exit_cond = icmp eq float* %i2, %end
br i1 %exit_cond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
-@generic_end = external addrspace(1) global float addrspace(4)*
+@generic_end = external addrspace(1) global float*
; CHECK-LABEL: @loop_with_generic_bound(
; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)*
-; CHECK: %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end
+; CHECK: %end = load float*, float* addrspace(1)* @generic_end
; CHECK: br label %loop
; CHECK: loop:
@@ -147,21 +147,21 @@
; CHECK: %v = load float, float addrspace(3)* %i
; CHECK: call void @use(float %v)
; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
-; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)*
-; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end
+; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float*
+; CHECK: %exit_cond = icmp eq float* %0, %end
; CHECK: br i1 %exit_cond, label %exit, label %loop
define amdgpu_kernel void @loop_with_generic_bound() #0 {
entry:
- %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
- %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end
+ %p = addrspacecast [10 x float] addrspace(3)* @array to float*
+ %end = load float*, float* addrspace(1)* @generic_end
br label %loop
loop: ; preds = %loop, %entry
- %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
- %v = load float, float addrspace(4)* %i
+ %i = phi float* [ %p, %entry ], [ %i2, %loop ]
+ %v = load float, float* %i
call void @use(float %v)
- %i2 = getelementptr float, float addrspace(4)* %i, i64 1
- %exit_cond = icmp eq float addrspace(4)* %i2, %end
+ %i2 = getelementptr float, float* %i, i64 1
+ %exit_cond = icmp eq float* %i2, %end
br i1 %exit_cond, label %exit, label %loop
exit: ; preds = %loop
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
index 74a2595..2d4bf14 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
@@ -8,9 +8,9 @@
; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
; CHECK-NEXT: ret void
define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store i32 8, i32 addrspace(3)* %asc1, align 8
ret void
}
@@ -21,9 +21,9 @@
; CHECK-NEXT: store i8 8, i8 addrspace(3)* [[CAST]], align 8
; CHECK-NEXT: ret void
define void @addrspacecast_different_pointee_type(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i8 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i8 addrspace(3)*
store i8 8, i8 addrspace(3)* %asc1, align 8
ret void
}
@@ -33,24 +33,24 @@
; CHECK-NEXT: store volatile i32 addrspace(3)* %gep0, i32 addrspace(3)* addrspace(1)* undef
; CHECK-NEXT: ret void
define void @addrspacecast_to_memory(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store volatile i32 addrspace(3)* %asc1, i32 addrspace(3)* addrspace(1)* undef
ret void
}
; CHECK-LABEL: @multiuse_addrspacecast_gep_addrspacecast(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
-; CHECK-NEXT: store volatile i32 addrspace(4)* %1, i32 addrspace(4)* addrspace(1)* undef
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32*
+; CHECK-NEXT: store volatile i32* %1, i32* addrspace(1)* undef
; CHECK-NEXT: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9
; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
; CHECK-NEXT: ret void
define void @multiuse_addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 addrspace(4)* %asc0, i32 addrspace(4)* addrspace(1)* undef
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ store volatile i32* %asc0, i32* addrspace(1)* undef
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store i32 8, i32 addrspace(3)* %asc1, align 8
ret void
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
index e2c255d..f9b788f 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
@@ -9,8 +9,8 @@
; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0
- %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double*), i64 %idx0
+ %asc = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.000000e+00, double addrspace(3)* %asc, align 8
ret void
}
@@ -19,8 +19,8 @@
; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %asc = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc, align 8
ret void
}
@@ -30,27 +30,27 @@
; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* %gep0, i64 %idx1
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %gep1 = getelementptr inbounds double, double addrspace(4)* %gep0, i64 %idx1
- %asc = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %gep1 = getelementptr inbounds double, double* %gep0, i64 %idx1
+ %asc = addrspacecast double* %gep1 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc, align 8
ret void
}
; Don't crash
; CHECK-LABEL: @vector_gep(
-; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*>
+; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*>
define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
- %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*>
- %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(4)*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
- %p0 = extractelement <4 x i32 addrspace(4)*> %p, i32 0
- %p1 = extractelement <4 x i32 addrspace(4)*> %p, i32 1
- %p2 = extractelement <4 x i32 addrspace(4)*> %p, i32 2
- %p3 = extractelement <4 x i32 addrspace(4)*> %p, i32 3
- store i32 99, i32 addrspace(4)* %p0
- store i32 99, i32 addrspace(4)* %p1
- store i32 99, i32 addrspace(4)* %p2
- store i32 99, i32 addrspace(4)* %p3
+ %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*>
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ %p0 = extractelement <4 x i32*> %p, i32 0
+ %p1 = extractelement <4 x i32*> %p, i32 1
+ %p2 = extractelement <4 x i32*> %p, i32 2
+ %p3 = extractelement <4 x i32*> %p, i32 3
+ store i32 99, i32* %p0
+ store i32 99, i32* %p1
+ store i32 99, i32* %p2
+ store i32 99, i32* %p3
ret void
}
@@ -61,12 +61,12 @@
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
; CHECK-NEXT: ret void
define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %asc0 = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc0, align 8
- %gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1
- %asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+ %gep1 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx1
+ %asc1 = addrspacecast double* %gep1 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc1, align 8
ret void
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
index ca6138d..723ce41 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
@@ -3,143 +3,143 @@
; CHECK-LABEL: @objectsize_group_to_flat_i32(
; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true, i1 false)
define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ %val = call i32 @llvm.objectsize.i32.p0i8(i8* %cast, i1 true, i1 false)
ret i32 %val
}
; CHECK-LABEL: @objectsize_global_to_flat_i64(
; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true, i1 false)
define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)*
- %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8*
+ %val = call i64 @llvm.objectsize.i64.p0i8(i8* %cast, i1 true, i1 false)
ret i64 %val
}
; CHECK-LABEL: @atomicinc_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y, i32 0, i32 0, i1 false)
define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y, i32 0, i32 0, i1 false)
define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false
define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
define i32 @volatile_atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
define i32 @volatile_atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
ret i64 %ret
}
-declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1, i1) #1
-declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1, i1) #1
-declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1
+declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
index dd0bbfd..d8987f8 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
@@ -3,100 +3,100 @@
; CHECK-LABEL: @memset_group_to_flat(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_global_to_flat(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_group_to_flat_no_md(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @memset_global_to_flat_no_md(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false)
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
-; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHCK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
-; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
- %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHECK: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8* %src.ptr, i64 %size) #0 {
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %src.group.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(1)* align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
- %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8*
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %dest.global.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
- %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8*
+ call void @llvm.memcpy.p0i8.p3i8.i32(i8* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest0, i8* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest0, i8* align 4 %cast.src, i64 %size, i1 false)
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest1, i8* align 4 %cast.src, i64 %size, i1 false)
ret void
}
@@ -104,22 +104,22 @@
; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 addrspace(3)* align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 addrspace(4)* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast, i8* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
-; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHECK: call void @llvm.memmove.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memmove.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
-declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1
-declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1
-declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
+declare void @llvm.memcpy.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+declare void @llvm.memcpy.p0i8.p3i8.i32(i8* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
index 3231b6c..2080c51 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
; Regression tests from old HSAIL addrspacecast optimization pass
@@ -14,7 +14,7 @@
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
- %vecload1 = load <2 x double>, <2 x double> addrspace(4)* bitcast (double addrspace(4)* getelementptr ([100 x double], [100 x double] addrspace(4)* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double] addrspace(4)*), i64 0, i64 4) to <2 x double> addrspace(4)*), align 8
+ %vecload1 = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([100 x double], [100 x double]* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double]*), i64 0, i64 4) to <2 x double>*), align 8
%cmp = fcmp ord <2 x double> %vecload1, zeroinitializer
%sext = sext <2 x i1> %cmp to <2 x i64>
%tmp4 = extractelement <2 x i64> %sext, i64 0
@@ -30,7 +30,7 @@
@generic_address_bug9749.val = internal addrspace(1) global float 0.0, align 4
-declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*)
+declare i32 @_Z9get_fencePv(i8*)
%opencl.pipe_t = type opaque
; This is a compile time assert bug, but we still want to check optimization
@@ -53,24 +53,24 @@
; Should generate flat load
; CHECK-LABEL: @generic_address_bug9749(
; CHECK: br i1
-; CHECK: load float, float addrspace(4)*
+; CHECK: load float, float*
; CHECK: br label
define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
entry:
- %ptr = alloca float addrspace(4)*, align 8
+ %ptr = alloca float*, align 8, addrspace(5)
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
store float 0x3FB99999A0000000, float addrspace(1)* @generic_address_bug9749.val, align 4
- store volatile float addrspace(4)* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float addrspace(4)*), float addrspace(4)** %ptr, align 8
- %tmp2 = load volatile float addrspace(4)*, float addrspace(4)** %ptr, align 8
+ store volatile float* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float*), float* addrspace(5)* %ptr, align 8
+ %tmp2 = load volatile float*, float* addrspace(5)* %ptr, align 8
%tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val, align 4
- %tmp4 = bitcast float addrspace(4)* %tmp2 to i8 addrspace(4)*
- %call.i = call i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)* %tmp4) #1
+ %tmp4 = bitcast float* %tmp2 to i8*
+ %call.i = call i32 @_Z9get_fencePv(i8* %tmp4) #1
%switch.i.i = icmp ult i32 %call.i, 4
br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit
if.end.i: ; preds = %entry
- %tmp5 = load float, float addrspace(4)* %tmp2, align 4
+ %tmp5 = load float, float* %tmp2, align 4
%not.cmp.i = fcmp oeq float %tmp5, %tmp3
%phitmp = zext i1 %not.cmp.i to i32
br label %helperFunction.exit
@@ -91,14 +91,14 @@
br i1 %cmp1, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
- %tmp = addrspacecast i32 addrspace(3)* %in to i32 addrspace(4)*
+ %tmp = addrspacecast i32 addrspace(3)* %in to i32*
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
%i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
- %ptr.02 = phi i32 addrspace(4)* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
- store i32 %i.03, i32 addrspace(4)* %ptr.02, align 4
- %add.ptr = getelementptr inbounds i32, i32 addrspace(4)* %ptr.02, i64 4
+ %ptr.02 = phi i32* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ store i32 %i.03, i32* %ptr.02, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr.02, i64 4
%inc = add nuw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %numElems
br i1 %exitcond, label %for.end, label %for.body
@@ -116,23 +116,23 @@
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%sext = shl i64 %tmp3, 32
- %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32 addrspace(4)*
- %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32 addrspace(4)*
+ %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32*
+ %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32*
%tmp6 = ashr exact i64 %sext, 31
- %tmp7 = getelementptr inbounds i32, i32 addrspace(4)* %tmp5, i64 %tmp6
- %arrayidx_v4 = bitcast i32 addrspace(4)* %tmp7 to <2 x i32> addrspace(4)*
- %vecload = load <2 x i32>, <2 x i32> addrspace(4)* %arrayidx_v4, align 4
+ %tmp7 = getelementptr inbounds i32, i32* %tmp5, i64 %tmp6
+ %arrayidx_v4 = bitcast i32* %tmp7 to <2 x i32>*
+ %vecload = load <2 x i32>, <2 x i32>* %arrayidx_v4, align 4
%tmp8 = extractelement <2 x i32> %vecload, i32 0
%tmp9 = extractelement <2 x i32> %vecload, i32 1
%tmp10 = icmp eq i32 %tmp8, 0
%tmp11 = select i1 %tmp10, i32 32, i32 %tmp8
%tmp12 = icmp eq i32 %tmp9, 0
%tmp13 = select i1 %tmp12, i32 32, i32 %tmp9
- %tmp14 = getelementptr inbounds i32, i32 addrspace(4)* %tmp4, i64 %tmp6
+ %tmp14 = getelementptr inbounds i32, i32* %tmp4, i64 %tmp6
%tmp15 = insertelement <2 x i32> undef, i32 %tmp11, i32 0
%tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1
- %arrayidx_v41 = bitcast i32 addrspace(4)* %tmp14 to <2 x i32> addrspace(4)*
- store <2 x i32> %tmp16, <2 x i32> addrspace(4)* %arrayidx_v41, align 4
+ %arrayidx_v41 = bitcast i32* %tmp14 to <2 x i32>*
+ store <2 x i32> %tmp16, <2 x i32>* %arrayidx_v41, align 4
ret void
}
@@ -140,4 +140,4 @@
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
\ No newline at end of file
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
index 08edc20..598bb68 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
@@ -4,25 +4,25 @@
; this doesn't do something insane on non-canonical IR.
; CHECK-LABEL: @return_select_group_flat(
-; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK-NEXT: %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
-; CHECK-NEXT: ret i32 addrspace(4)* %select
-define i32 addrspace(4)* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- ret i32 addrspace(4)* %select
+; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK-NEXT: %select = select i1 %c, i32* %cast0, i32* %cast1
+; CHECK-NEXT: ret i32* %select
+define i32* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ ret i32* %select
}
; CHECK-LABEL: @store_select_group_flat(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ store i32 -1, i32* %select
ret void
}
@@ -31,23 +31,23 @@
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1, !prof !0
; CHECK: %load = load i32, i32 addrspace(3)* %select
define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1, !prof !0
- %load = load i32, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1, !prof !0
+ %load = load i32, i32* %select
ret i32 %load
}
; CHECK-LABEL: @store_select_mismatch_group_private_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %2 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* %2
-; CHECK: store i32 -1, i32 addrspace(4)* %select
-define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- store i32 -1, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %2 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* %2
+; CHECK: store i32 -1, i32* %select
+define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ store i32 -1, i32* %select
ret void
}
@@ -58,35 +58,35 @@
; CHECK: %tmp = load i32, i32 addrspace(3)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(3)* @lds0, i32 addrspace(3)* @lds1)
define i32 @constexpr_select_group_flat() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*))
+ %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*))
ret i32 %tmp
}
; CHECK-LABEL: @constexpr_select_group_global_flat_mismatch(
-; CHECK: %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*))
+; CHECK: %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
define i32 @constexpr_select_group_global_flat_mismatch() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*))
+ %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
ret i32 %tmp
}
; CHECK-LABEL: @store_select_group_flat_null(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* null
+ store i32 -1, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_null_swap(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
+; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* null, i32 addrspace(4)* %cast0
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* null, i32* %cast0
+ store i32 -1, i32* %select
ret void
}
@@ -94,9 +94,9 @@
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* undef
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* undef
+ store i32 -1, i32* %select
ret void
}
@@ -104,21 +104,21 @@
; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* undef, i32 addrspace(4)* %cast0
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* undef, i32* %cast0
+ store i32 -1, i32* %select
ret void
}
; CHECK-LABEL: @store_select_gep_group_flat_null(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16
; CHECK: store i32 -1, i32 addrspace(3)* %gep
define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
- %gep = getelementptr i32, i32 addrspace(4)* %select, i64 16
- store i32 -1, i32 addrspace(4)* %gep
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* null
+ %gep = getelementptr i32, i32* %select, i64 16
+ store i32 -1, i32* %gep
ret void
}
@@ -128,19 +128,19 @@
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1
; CHECK: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_inttoptr_flat(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* inttoptr (i64 12345 to i32*) to i32 addrspace(3)*)
; CHECK: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* inttoptr (i64 12345 to i32*)
+ store i32 7, i32* %select
ret void
}
@@ -148,114 +148,114 @@
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*)
; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %1
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %1
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %cast0
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_null(
-; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
- %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
+; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32* inttoptr (i64 123 to i32*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* inttoptr (i64 123 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr(
; CHECK: store i32 7, i32 addrspace(3)* null
define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* undef to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4
ret void
}
@lds2 = external addrspace(3) global [1024 x i32], align 4
; CHECK-LABEL: @store_select_group_constexpr_ptrtoint(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_vector(
-; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
-; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
-; CHECK: %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1
-; CHECK: %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0
-; CHECK: %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1
-; CHECK: store i32 -1, i32 addrspace(4)* %extract0
-; CHECK: store i32 -2, i32 addrspace(4)* %extract1
+; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+; CHECK: %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
+; CHECK: %extract0 = extractelement <2 x i32*> %select, i32 0
+; CHECK: %extract1 = extractelement <2 x i32*> %select, i32 1
+; CHECK: store i32 -1, i32* %extract0
+; CHECK: store i32 -2, i32* %extract1
define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
- %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
- %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
- %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1
- %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0
- %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1
- store i32 -1, i32 addrspace(4)* %extract0
- store i32 -2, i32 addrspace(4)* %extract1
+ %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+ %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+ %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
+ %extract0 = extractelement <2 x i32*> %select, i32 0
+ %extract1 = extractelement <2 x i32*> %select, i32 1
+ store i32 -1, i32* %extract0
+ store i32 -2, i32* %extract1
ret void
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
index 49467ce..6c9449c 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
@@ -3,138 +3,138 @@
; Check that volatile users of addrspacecast are not replaced.
; CHECK-LABEL: @volatile_load_flat_from_global(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_constant(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(2)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(2)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_group(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(3)*
define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_private(
-; CHECK: load volatile i32, i32 addrspace(4)*
-; CHECK: store i32 %val, i32*
-define amdgpu_kernel void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+; CHECK: load volatile i32, i32*
+; CHECK: store i32 %val, i32 addrspace(5)*
+define amdgpu_kernel void @volatile_load_flat_from_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_global(
; CHECK: load i32, i32 addrspace(1)*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
+; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_group(
; CHECK: load i32, i32 addrspace(3)*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
+; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_private(
-; CHECK: load i32, i32*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
-define amdgpu_kernel void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+; CHECK: load i32, i32 addrspace(5)*
+; CHECK: store volatile i32 %val, i32*
+define amdgpu_kernel void @volatile_store_flat_to_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat(
-; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK: atomicrmw volatile add i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK: atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat(
-; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK: %ret = atomicrmw volatile add i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK: %ret = atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @volatile_cmpxchg_global_to_flat(
-; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK: cmpxchg volatile i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; CHECK-LABEL: @volatile_cmpxchg_group_to_flat(
-; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK: cmpxchg volatile i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; FIXME: Shouldn't be losing names
; CHECK-LABEL: @volatile_memset_group_to_flat(
-; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
-; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
ret void
}
; CHECK-LABEL: @volatile_memset_global_to_flat(
-; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
-; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true)
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
ret void
}
-declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
index 368dc6a..87acb10 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
@@ -1,38 +1,37 @@
-; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
-; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
+; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
+; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; ALL-LABEL: @load_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
-; UNALIGNED: load <2 x i8>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i8>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i8, i8* %ptr0, align 1{{$}}
-; ALIGNED: load i8, i8* %ptr1, align 1{{$}}
+; ALIGNED: load i8, i8 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: load i8, i8 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i8], align 1
- %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
- %val0 = load i8, i8* %ptr0, align 1
- %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
- %val1 = load i8, i8* %ptr1, align 1
+ %alloca = alloca [128 x i8], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i8, i8 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
+ %val1 = load i8, i8 addrspace(5)* %ptr1, align 1
%add = add i8 %val0, %val1
store i8 %add, i8 addrspace(1)* %out
ret void
}
; ALL-LABEL: @load_unknown_offset_align1_i16(
-; ALL: alloca [128 x i16], align 1{{$}}
-; UNALIGNED: load <2 x i16>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
+; ALL: alloca [128 x i16], align 1, addrspace(5){{$}}
+; UNALIGNED: load <2 x i16>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i16, i16* %ptr0, align 1{{$}}
-; ALIGNED: load i16, i16* %ptr1, align 1{{$}}
+; ALIGNED: load i16, i16 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: load i16, i16 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i16], align 1
- %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
- %val0 = load i16, i16* %ptr0, align 1
- %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
- %val1 = load i16, i16* %ptr1, align 1
+ %alloca = alloca [128 x i16], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i16, i16 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
+ %val1 = load i16, i16 addrspace(5)* %ptr1, align 1
%add = add i16 %val0, %val1
store i16 %add, i16 addrspace(1)* %out
ret void
@@ -43,16 +42,16 @@
; ALL-LABEL: @load_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
-; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i32, i32* %ptr0, align 1
-; ALIGNED: load i32, i32* %ptr1, align 1
+; ALIGNED: load i32, i32 addrspace(5)* %ptr0, align 1
+; ALIGNED: load i32, i32 addrspace(5)* %ptr1, align 1
define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 1
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- %val0 = load i32, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- %val1 = load i32, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -63,17 +62,17 @@
; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 16
-; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
; FIXME: Should change alignment
; ALIGNED: load i32
; ALIGNED: load i32
define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 16
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- %val0 = load i32, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- %val1 = load i32, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 16, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -81,31 +80,31 @@
; ALL-LABEL: @store_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
-; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}}
-; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}}
+; ALIGNED: store i8 9, i8 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: store i8 10, i8 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i8], align 1
- %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
- store i8 9, i8* %ptr0, align 1
- %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
- store i8 10, i8* %ptr1, align 1
+ %alloca = alloca [128 x i8], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i8 9, i8 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
+ store i8 10, i8 addrspace(5)* %ptr1, align 1
ret void
}
; ALL-LABEL: @store_unknown_offset_align1_i16(
; ALL: alloca [128 x i16], align 1
-; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}}
-; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}}
+; ALIGNED: store i16 9, i16 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: store i16 10, i16 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i16], align 1
- %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
- store i16 9, i16* %ptr0, align 1
- %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
- store i16 10, i16* %ptr1, align 1
+ %alloca = alloca [128 x i16], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i16 9, i16 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
+ store i16 10, i16 addrspace(5)* %ptr1, align 1
ret void
}
@@ -115,16 +114,16 @@
; ALL-LABEL: @store_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
-; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i32 9, i32* %ptr0, align 1
-; ALIGNED: store i32 10, i32* %ptr1, align 1
+; ALIGNED: store i32 9, i32 addrspace(5)* %ptr0, align 1
+; ALIGNED: store i32 10, i32 addrspace(5)* %ptr1, align 1
define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 1
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- store i32 9, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- store i32 10, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i32 9, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ store i32 10, i32 addrspace(5)* %ptr1, align 1
ret void
}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
index 0fcdc7b..4335278 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
@@ -5,7 +5,6 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT8,ELT8-UNALIGNED,UNALIGNED,ALL %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT16,ELT16-UNALIGNED,UNALIGNED,ALL %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
; ALIGNED: store i32
@@ -17,52 +16,52 @@
; ELT8-UNALIGNED: store <2 x i32>
; ELT16-UNALIGNED: store <4 x i32>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
- store i32 9, i32* %out
- store i32 1, i32* %out.gep.1
- store i32 23, i32* %out.gep.2
- store i32 19, i32* %out.gep.3
+ store i32 9, i32 addrspace(5)* %out
+ store i32 1, i32 addrspace(5)* %out.gep.1
+ store i32 23, i32 addrspace(5)* %out.gep.2
+ store i32 19, i32 addrspace(5)* %out.gep.3
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align1(
-; ALIGNED: store i32 9, i32* %out, align 1
-; ALIGNED: store i32 1, i32* %out.gep.1, align 1
-; ALIGNED: store i32 23, i32* %out.gep.2, align 1
-; ALIGNED: store i32 19, i32* %out.gep.3, align 1
+; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 1
+; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 1
+; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 1
-; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 1
+; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 1
-; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32>* %1, align 1
-; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32>* %2, align 1
+; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32> addrspace(5)* %1, align 1
+; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32> addrspace(5)* %2, align 1
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
- store i32 9, i32* %out, align 1
- store i32 1, i32* %out.gep.1, align 1
- store i32 23, i32* %out.gep.2, align 1
- store i32 19, i32* %out.gep.3, align 1
+ store i32 9, i32 addrspace(5)* %out, align 1
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 1
+ store i32 19, i32 addrspace(5)* %out.gep.3, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align2(
-; ALIGNED: store i32 9, i32* %out, align 2
-; ALIGNED: store i32 1, i32* %out.gep.1, align 2
-; ALIGNED: store i32 23, i32* %out.gep.2, align 2
-; ALIGNED: store i32 19, i32* %out.gep.3, align 2
+; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 2
+; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 2
+; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 2
+; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 2
-; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 2
+; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 2
; ELT8-UNALIGNED: store <2 x i32>
; ELT8-UNALIGNED: store <2 x i32>
@@ -71,29 +70,29 @@
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
- store i32 9, i32* %out, align 2
- store i32 1, i32* %out.gep.1, align 2
- store i32 23, i32* %out.gep.2, align 2
- store i32 19, i32* %out.gep.3, align 2
+ store i32 9, i32 addrspace(5)* %out, align 2
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 2
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 2
+ store i32 19, i32 addrspace(5)* %out.gep.3, align 2
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
; ALL: store <4 x i8>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i32 1
- %out.gep.2 = getelementptr i8, i8* %out, i32 2
- %out.gep.3 = getelementptr i8, i8* %out, i32 3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3
- store i8 9, i8* %out, align 4
- store i8 1, i8* %out.gep.1
- store i8 23, i8* %out.gep.2
- store i8 19, i8* %out.gep.3
+ store i8 9, i8 addrspace(5)* %out, align 4
+ store i8 1, i8 addrspace(5)* %out.gep.1
+ store i8 23, i8 addrspace(5)* %out.gep.2
+ store i8 19, i8 addrspace(5)* %out.gep.3
ret void
}
@@ -103,26 +102,26 @@
; ALIGNED: store i8
; ALIGNED: store i8
-; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8>* %1, align 1
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i32 1
- %out.gep.2 = getelementptr i8, i8* %out, i32 2
- %out.gep.3 = getelementptr i8, i8* %out, i32 3
+; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8> addrspace(5)* %1, align 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3
- store i8 9, i8* %out, align 1
- store i8 1, i8* %out.gep.1, align 1
- store i8 23, i8* %out.gep.2, align 1
- store i8 19, i8* %out.gep.3, align 1
+ store i8 9, i8 addrspace(5)* %out, align 1
+ store i8 1, i8 addrspace(5)* %out.gep.1, align 1
+ store i8 23, i8 addrspace(5)* %out.gep.2, align 1
+ store i8 19, i8 addrspace(5)* %out.gep.3, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16(
; ALL: store <2 x i16>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 4
- store i16 12, i16* %out.gep.1
+ store i16 9, i16 addrspace(5)* %out, align 4
+ store i16 12, i16 addrspace(5)* %out.gep.1
ret void
}
@@ -130,12 +129,12 @@
; ALIGNED: store i16
; ALIGNED: store i16
-; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 2
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 2
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 2
- store i16 12, i16* %out.gep.1, align 2
+ store i16 9, i16 addrspace(5)* %out, align 2
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 2
ret void
}
@@ -143,22 +142,22 @@
; ALIGNED: store i16
; ALIGNED: store i16
-; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 1
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 1
- store i16 12, i16* %out.gep.1, align 1
+ store i16 9, i16 addrspace(5)* %out, align 1
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16_align8(
-; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 8
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 8
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 8
- store i16 12, i16* %out.gep.1, align 2
+ store i16 9, i16 addrspace(5)* %out, align 8
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 2
ret void
}
@@ -179,13 +178,13 @@
; ELT16-ALIGNED: store i32
; ELT16-UNALIGNED: store <3 x i32>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
- store i32 9, i32* %out
- store i32 1, i32* %out.gep.1
- store i32 23, i32* %out.gep.2
+ store i32 9, i32 addrspace(5)* %out
+ store i32 1, i32 addrspace(5)* %out.gep.1
+ store i32 23, i32 addrspace(5)* %out.gep.2
ret void
}
@@ -202,13 +201,13 @@
; ELT8-UNALIGNED: store i32
; ELT16-UNALIGNED: store <3 x i32>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
- store i32 9, i32* %out, align 1
- store i32 1, i32* %out.gep.1, align 1
- store i32 23, i32* %out.gep.2, align 1
+ store i32 9, i32 addrspace(5)* %out, align 1
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 1
ret void
}
@@ -218,13 +217,13 @@
; ALIGNED: store i8
; UNALIGNED: store <3 x i8>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i8 1
- %out.gep.2 = getelementptr i8, i8* %out, i8 2
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i8 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i8 2
- store i8 9, i8* %out, align 1
- store i8 1, i8* %out.gep.1, align 1
- store i8 23, i8* %out.gep.2, align 1
+ store i8 9, i8 addrspace(5)* %out, align 1
+ store i8 1, i8 addrspace(5)* %out.gep.1, align 1
+ store i8 23, i8 addrspace(5)* %out.gep.2, align 1
ret void
}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
index 02c3c05..d558aa2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
@@ -1,6 +1,5 @@
; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; We need to compile this for a target where we have different address spaces,
@@ -21,9 +20,9 @@
loop:
%idx0 = phi i32 [ %next_idx0, %loop ], [ 0, %entry ]
- %0 = getelementptr inbounds i32, i32* null, i32 %idx0
+ %0 = getelementptr inbounds i32, i32 addrspace(5)* null, i32 %idx0
%1 = getelementptr inbounds i32, i32 addrspace(1)* null, i32 %idx0
- store i32 1, i32* %0
+ store i32 1, i32 addrspace(5)* %0
store i32 7, i32 addrspace(1)* %1
%next_idx0 = add nuw nsw i32 %idx0, 1
br label %loop
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
index e986c3d..362001f 100644
--- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
+++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
+; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
; Check that we full unroll loop to be able to eliminate alloca
; CHECK-LABEL: @non_invariant_ind
@@ -9,13 +9,13 @@
define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [64 x i32], align 4
+ %arr = alloca [64 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -27,8 +27,8 @@
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -42,7 +42,7 @@
define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [64 x i32], align 4
+ %arr = alloca [64 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.cond2.preheader
@@ -54,8 +54,8 @@
br label %for.body6
for.cond.cleanup: ; preds = %for.cond.cleanup5
- %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
- %tmp16 = load i32, i32* %arrayidx13, align 4
+ %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp16 = load i32, i32 addrspace(5)* %arrayidx13, align 4
%arrayidx15 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
ret void
@@ -69,8 +69,8 @@
%j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ]
%add = add nsw i32 %j.025, %tmp1
%rem = srem i32 %add, 64
- %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp15, i32* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp15, i32 addrspace(5)* %arrayidx8, align 4
%inc = add nuw nsw i32 %j.025, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
@@ -84,13 +84,13 @@
define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [256 x i32], align 4
+ %arr = alloca [256 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -102,8 +102,8 @@
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -118,13 +118,13 @@
define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) {
entry:
- %arr = alloca i32, i32 %n, align 4
+ %arr = alloca i32, i32 %n, align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -136,8 +136,8 @@
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body