[NVPTXFavorNonGenericAddrSpaces] recursively trace into GEP and BitCast Summary: This patch allows NVPTXFavorNonGenericAddrSpaces to remove addrspacecast from longer chains consisting of GEPs and BitCasts. For example, it can now optimize %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* %1 = gep [10 x float]* %0, i64 0, i64 %i %2 = bitcast float* %1 to i32* %3 = load i32* %2 ; emits ld.u32 to %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 Test Plan: @ld_int_from_global_float in access-non-generic.ll Reviewers: broune, eliben, jholewinski, meheff Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D10074 llvm-svn: 238574

commit: 995dde27995e3f957966fd52d721d65d60bda43b [log] [tgz]
author: Jingyue Wu <jingyue@google.com> Fri May 29 17:00:27 2015 +0000
committer: Jingyue Wu <jingyue@google.com> Fri May 29 17:00:27 2015 +0000
tree: 07a44f22f2948de96cd0013a08b6a9382661923f
parent: a84feb17274882ad0830f5dc538307942db155ab [diff] [blame]
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index e709302..5deefe8 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll

@@ -85,6 +85,22 @@
   ret i32 %1
 }
 
+define i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) {
+; IR-LABEL: @ld_int_from_global_float(
+; PTX-LABEL: ld_int_from_global_float(
+  %1 = addrspacecast float addrspace(1)* %input to float*
+  %2 = getelementptr float, float* %1, i32 %i
+; IR-NEXT: getelementptr float, float addrspace(1)* %input, i32 %i
+  %3 = getelementptr float, float* %2, i32 %j
+; IR-NEXT: getelementptr float, float addrspace(1)* {{%[^,]+}}, i32 %j
+  %4 = bitcast float* %3 to i32*
+; IR-NEXT: bitcast float addrspace(1)* {{%[^ ]+}} to i32 addrspace(1)*
+  %5 = load i32, i32* %4
+; IR-NEXT: load i32, i32 addrspace(1)* {{%.+}}
+; PTX-LABEL: ld.global
+  ret i32 %5
+}
+
 declare void @llvm.cuda.syncthreads() #3
 
 attributes #3 = { noduplicate nounwind }
commit	995dde27995e3f957966fd52d721d65d60bda43b	[log] [tgz]
author	Jingyue Wu <jingyue@google.com>	Fri May 29 17:00:27 2015 +0000
committer	Jingyue Wu <jingyue@google.com>	Fri May 29 17:00:27 2015 +0000
tree	07a44f22f2948de96cd0013a08b6a9382661923f
parent	a84feb17274882ad0830f5dc538307942db155ab [diff] [blame]