InferAddressSpaces: Fix cloning original addrspacecast

If an addrspacecast needed to be inserted again, this was creating a
clone of the original cast for each user. Just use the original, which
also saves losing the value name.

llvm-svn: 363562
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
index 0a5e7a5..4829759 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
@@ -18,9 +18,9 @@
 }
 
 ; CHECK-LABEL: @icmp_mismatch_flat_from_group_private(
-; CHECK: %1 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
-; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
-; CHECK: %cmp = icmp eq i32* %1, %2
+; CHECK: %cast0 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
+; CHECK: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %cast0, %cast1
 define i1 @icmp_mismatch_flat_from_group_private(i32 addrspace(5)* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
   %cast0 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
   %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
@@ -29,8 +29,8 @@
 }
 
 ; CHECK-LABEL: @icmp_flat_group_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %cmp = icmp eq i32* %1, %flat.ptr.1
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %cast0, %flat.ptr.1
 define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32* %flat.ptr.1) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
   %cmp = icmp eq i32* %cast0, %flat.ptr.1
@@ -38,8 +38,8 @@
 }
 
 ; CHECK-LABEL: @icmp_flat_flat_group(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
-; CHECK: %cmp = icmp eq i32* %flat.ptr.0, %1
+; CHECK: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %flat.ptr.0, %cast1
 define i1 @icmp_flat_flat_group(i32* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
   %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
   %cmp = icmp eq i32* %flat.ptr.0, %cast1
@@ -78,8 +78,8 @@
 }
 
 ; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(5)* null to i32*)
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* null to i32*)
 define i1 @icmp_mismatch_flat_group_private_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
   %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* null to i32*)
@@ -98,8 +98,8 @@
 @global0 = internal addrspace(1) global i32 0, align 4
 
 ; CHECK-LABEL: @icmp_mismatch_flat_group_global_cmp_gv(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(1)* @global0 to i32*)
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32*)
 define i1 @icmp_mismatch_flat_group_global_cmp_gv(i32 addrspace(3)* %group.ptr.0) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
   %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32*)
@@ -123,8 +123,8 @@
 
 ; Test non-canonical orders
 ; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %1
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %cast0
 define i1 @icmp_mismatch_flat_group_private_cmp_null_swap(i32 addrspace(3)* %group.ptr.0) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
   %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %cast0
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
index 2d4bf14..e1bc925 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
@@ -41,8 +41,8 @@
 }
 
 ; CHECK-LABEL: @multiuse_addrspacecast_gep_addrspacecast(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32*
-; CHECK-NEXT: store volatile i32* %1, i32* addrspace(1)* undef
+; CHECK: %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+; CHECK-NEXT: store volatile i32* %asc0, i32* addrspace(1)* undef
 ; CHECK-NEXT: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9
 ; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
 ; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
index 4694a1c..9f574fa 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
@@ -81,8 +81,8 @@
 }
 
 ; CHECK-LABEL: @volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
 define i64 @volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
   %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
   %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
@@ -90,8 +90,8 @@
 }
 
 ; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
 define i32 @volatile_atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
   %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
   %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
@@ -99,8 +99,8 @@
 }
 
 ; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
 define i32 @volatile_atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
   %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
   %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
@@ -108,8 +108,8 @@
 }
 
 ; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
 define i64 @volatile_atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
   %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
   %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
@@ -117,8 +117,8 @@
 }
 
 ; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
 define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
   %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
   %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/redundant-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/redundant-addrspacecast.ll
new file mode 100644
index 0000000..73d8039
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/redundant-addrspacecast.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+
+%0 = type { i8, i8, i8 }
+
+; Make sure there is only one addrspacecast. The original cast should
+; not be cloned to satisfy the second user.
+define void @bar(%0 addrspace(1)* %orig.ptr) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ORIG_CAST:%.*]] = addrspacecast [[TMP0:%.*]] addrspace(1)* [[ORIG_PTR:%.*]] to %0*
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ORIG_CAST]], i64 0, i32 1
+; CHECK-NEXT:    call void @foo(i8* [[GEP0]])
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ORIG_CAST]], i64 0, i32 2
+; CHECK-NEXT:    call void @foo(i8* [[GEP1]])
+; CHECK-NEXT:    ret void
+;
+bb:
+  %orig.cast = addrspacecast %0 addrspace(1)* %orig.ptr to %0*
+  %gep0 = getelementptr inbounds %0, %0* %orig.cast, i64 0, i32 1
+  call void @foo(i8* %gep0)
+  %gep1 = getelementptr inbounds %0, %0* %orig.cast, i64 0, i32 2
+  call void @foo(i8* %gep1)
+  ret void
+}
+
+declare void @foo(i8*)
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
index 598bb68..1fa4bdc 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
@@ -39,9 +39,9 @@
 }
 
 ; CHECK-LABEL: @store_select_mismatch_group_private_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %2 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
-; CHECK: %select = select i1 %c, i32* %1, i32* %2
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
+; CHECK: %select = select i1 %c, i32* %cast0, i32* %cast1
 ; CHECK: store i32 -1, i32* %select
 define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
@@ -155,8 +155,8 @@
 }
 
 ; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
 ; CHECK: store i32 7, i32* %select
 define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
@@ -166,8 +166,8 @@
 }
 
 ; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %1
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
 ; CHECK: store i32 7, i32* %select
 define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
@@ -230,8 +230,8 @@
 @lds2 = external addrspace(3) global [1024 x i32], align 4
 
 ; CHECK-LABEL: @store_select_group_constexpr_ptrtoint(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
-; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
+; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
 ; CHECK: store i32 7, i32* %select
 define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
   %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
index 5f21489..3394fe6 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
@@ -115,10 +115,9 @@
   ret { i32, i1 } %ret
 }
 
-; FIXME: Shouldn't be losing names
 ; CHECK-LABEL: @volatile_memset_group_to_flat(
-; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
 define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
   %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
@@ -126,8 +125,8 @@
 }
 
 ; CHECK-LABEL: @volatile_memset_global_to_flat(
-; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
 define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
   %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)