| Matt Arsenault | 06028dd | 2014-05-01 16:37:52 +0000 | [diff] [blame] | 1 | ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s | 
| Matt Arsenault | a98cd6a | 2013-12-19 05:32:55 +0000 | [diff] [blame] | 2 |  | 
|  | 3 | declare i32 @llvm.SI.tid() nounwind readnone | 
|  | 4 | declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate | 
|  | 5 |  | 
|  | 6 | ; The required pointer calculations for the alloca'd actually requires | 
|  | 7 | ; an add and won't be folded into the addressing, which fails with a | 
|  | 8 | ; 64-bit pointer add. This should work since private pointers should | 
|  | 9 | ; be 32-bits. | 
|  | 10 |  | 
|  | 11 | ; SI-LABEL: @test_private_array_ptr_calc: | 
|  | 12 | ; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]] | 
| Tom Stellard | 880a80a | 2014-06-17 16:53:14 +0000 | [diff] [blame] | 13 | ; | 
|  | 14 | ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this | 
|  | 15 | ; alloca to a vector.  It currently fails because it does not know how | 
|  | 16 | ; to interpret: | 
|  | 17 | ; getelementptr [4 x i32]* %alloca, i32 1, i32 %b | 
|  | 18 | ; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]] | 
| Matt Arsenault | a98cd6a | 2013-12-19 05:32:55 +0000 | [diff] [blame] | 19 | define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { | 
|  | 20 | %alloca = alloca [4 x i32], i32 4, align 16 | 
|  | 21 | %tid = call i32 @llvm.SI.tid() readnone | 
|  | 22 | %a_ptr = getelementptr i32 addrspace(1)* %inA, i32 %tid | 
|  | 23 | %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid | 
|  | 24 | %a = load i32 addrspace(1)* %a_ptr | 
|  | 25 | %b = load i32 addrspace(1)* %b_ptr | 
|  | 26 | %result = add i32 %a, %b | 
|  | 27 | %alloca_ptr = getelementptr [4 x i32]* %alloca, i32 1, i32 %b | 
|  | 28 | store i32 %result, i32* %alloca_ptr, align 4 | 
|  | 29 | ; Dummy call | 
|  | 30 | call void @llvm.AMDGPU.barrier.local() nounwind noduplicate | 
|  | 31 | %reload = load i32* %alloca_ptr, align 4 | 
|  | 32 | %out_ptr = getelementptr i32 addrspace(1)* %out, i32 %tid | 
|  | 33 | store i32 %reload, i32 addrspace(1)* %out_ptr, align 4 | 
|  | 34 | ret void | 
|  | 35 | } | 
|  | 36 |  |