LLParser: add an argument for overriding data layout and do not check alloca addr space
Sometimes users do not specify data layout in LLVM assembly and let llc set the
data layout by target triple after loading the LLVM assembly.
Currently the parser checks alloca address space no matter whether the LLVM
assembly contains data layout definition, which causes false alarm since the
default data layout does not contain the correct alloca address space.
The parser also calls verifier to check debug info and updating invalid debug
info. Currently there is no way to let the verifier to check debug info only.
If the verifier finds non-debug-info issues the parser will fail.
For llc, the fix is to remove the check of alloca addr space in the parser and
disable updating debug info, and defer the updating of debug info and
verification to be after setting data layout of the IR by target.
For other llvm tools, since they do not override data layout by target but
instead can override data layout by a command line option, an argument for
overriding data layout is added to the parser. In cases where data layout
overriding is necessary for the parser, the data layout can be provided by
command line.
Differential Revision: https://reviews.llvm.org/D41832
llvm-svn: 323826
diff --git a/llvm/test/CodeGen/AMDGPU/alloca.ll b/llvm/test/CodeGen/AMDGPU/alloca.ll
new file mode 100644
index 0000000..fd42e92
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/alloca.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as -data-layout=A5 < %s | llvm-dis | FileCheck %s
+; RUN: llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s
+; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple amdgcn-amd-amdhsa-amdgiz
+; RUN: opt -data-layout=A5 -S < %s
+; RUN: llvm-as -data-layout=A5 < %s | opt -S
+
+; CHECK: %tmp = alloca i32, addrspace(5)
+define amdgpu_kernel void @test() {
+ %tmp = alloca i32, addrspace(5)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
index f140a76..8c6c17e 100644
--- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
@@ -16,8 +17,8 @@
; GCN-NEXT: s_barrier
; GCN: flat_store_dword
define amdgpu_kernel void @test_local(i32 addrspace(1)*) {
- %2 = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4
+ %2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
%3 = call i32 @llvm.amdgcn.workitem.id.x()
%4 = zext i32 %3 to i64
%5 = icmp eq i64 %4, 0
@@ -32,7 +33,7 @@
call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%8 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4
- %9 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
+ %9 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%10 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%11 = call i32 @llvm.amdgcn.workitem.id.x()
%12 = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -58,14 +59,14 @@
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: s_barrier
define amdgpu_kernel void @test_global(i32 addrspace(1)*) {
- %2 = alloca i32 addrspace(1)*, align 4
- %3 = alloca i32, align 4
- store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4
- store i32 0, i32* %3, align 4
+ %2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ %3 = alloca i32, align 4, addrspace(5)
+ store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
+ store i32 0, i32 addrspace(5)* %3, align 4
br label %4
; <label>:4: ; preds = %58, %1
- %5 = load i32, i32* %3, align 4
+ %5 = load i32, i32 addrspace(5)* %3, align 4
%6 = sext i32 %5 to i64
%7 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%8 = call i32 @llvm.amdgcn.workitem.id.x()
@@ -101,8 +102,8 @@
%36 = add i64 %35, %33
%37 = add i64 %36, 2184
%38 = trunc i64 %37 to i32
- %39 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
- %40 = load i32, i32* %3, align 4
+ %39 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
+ %40 = load i32, i32 addrspace(5)* %3, align 4
%41 = sext i32 %40 to i64
%42 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%43 = call i32 @llvm.amdgcn.workitem.id.x()
@@ -127,9 +128,9 @@
br label %58
; <label>:58: ; preds = %22
- %59 = load i32, i32* %3, align 4
+ %59 = load i32, i32 addrspace(5)* %3, align 4
%60 = add nsw i32 %59, 1
- store i32 %60, i32* %3, align 4
+ store i32 %60, i32 addrspace(5)* %3, align 4
br label %4
; <label>:61: ; preds = %4
@@ -143,9 +144,9 @@
; GCN-NEXT: s_barrier
; GCN: flat_store_dword
define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) {
- %2 = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4
- %3 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
+ %2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
+ %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%5 = call i32 @llvm.amdgcn.workitem.id.x()
%6 = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -176,7 +177,7 @@
call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%24 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4
- %25 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
+ %25 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%26 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%27 = call i32 @llvm.amdgcn.workitem.id.x()
%28 = call i32 @llvm.amdgcn.workgroup.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll
new file mode 100644
index 0000000..043ab46
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as -data-layout=A5 < %s 2>&1 | FileCheck -check-prefixes=COMMON,AS %s
+; RUN: not llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+; RUN: llvm-as < %s | not llc -mtriple amdgcn-amd-amdhsa-amdgiz 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+; RUN: not opt -data-layout=A5 -S < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+; RUN: llvm-as < %s | not opt -data-layout=A5 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+
+; AS: assembly parsed, but does not verify as correct!
+; COMMON: Allocation instruction pointer not in the stack address space!
+; COMMON: %tmp = alloca i32
+; LLC: error: input module is broken!
+
+define amdgpu_kernel void @test() {
+ %tmp = alloca i32
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index a89011a..bf1fdca 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -14,7 +14,7 @@
define amdgpu_kernel void @sched_dbg_value_crash(i8 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture readonly %arg1, %struct.widget.0 addrspace(1)* nocapture readonly %arg2, %struct.baz addrspace(1)* nocapture readonly %arg3, %struct.snork addrspace(1)* nocapture %arg4) local_unnamed_addr #2 {
bb:
%0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3
- %tmp5 = alloca %struct.wombat, align 16
+ %tmp5 = alloca %struct.wombat, align 16, addrspace(5)
%1 = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%2 = bitcast i8 addrspace(2)* %1 to i32 addrspace(2)*
%3 = getelementptr inbounds i32, i32 addrspace(2)* %2, i64 1
@@ -48,9 +48,9 @@
%tmp20 = shl nuw nsw i64 %tmp19, 2
%tmp21 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp20
%tmp22 = bitcast i8 addrspace(1)* %tmp21 to %struct.wombat.1 addrspace(1)*
- %tmp23 = bitcast %struct.wombat* %tmp5 to i8*
- call void @llvm.lifetime.start.p0i8(i64 144, i8* nonnull %tmp23) #3
- %tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 6
+ %tmp23 = bitcast %struct.wombat addrspace(5)* %tmp5 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3
+ %tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 6
%tmp25 = getelementptr i32, i32 addrspace(1)* %arg1, i64 3, !amdgpu.uniform !3, !amdgpu.noclobber !3
%tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
%tmp27 = zext i32 %tmp26 to i64
@@ -103,7 +103,7 @@
%tmp74 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %tmp69, i32 0
%tmp75 = insertelement <4 x float> %tmp74, float %tmp71, i32 1
%tmp76 = insertelement <4 x float> %tmp75, float %tmp73, i32 2
- store <4 x float> %tmp76, <4 x float>* %tmp24, align 16
+ store <4 x float> %tmp76, <4 x float> addrspace(5)* %tmp24, align 16
%tmp77 = fsub float undef, %tmp60
%tmp78 = fsub float undef, %tmp61
%tmp79 = extractelement <4 x float> %tmp66, i32 2
@@ -125,26 +125,26 @@
%tmp94 = call float @llvm.fmuladd.f32(float %tmp92, float %tmp36, float %tmp93)
%tmp95 = call float @llvm.fmuladd.f32(float %tmp48, float undef, float %tmp94)
%tmp96 = fsub float extractelement (<2 x float> fadd (<2 x float> fmul (<2 x float> undef, <2 x float> undef), <2 x float> undef), i64 1), %tmp95
- %tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 8, i32 1
- call void @func(float %tmp96, i64 0, i16* nonnull %tmp97) #3
+ %tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 8, i32 1
+ call void @func(float %tmp96, i64 0, i16 addrspace(5)* nonnull %tmp97) #3
%tmp984 = bitcast [16 x i8] addrspace(3)* %17 to i8 addrspace(3)*
%tmp99 = getelementptr inbounds %struct.snork, %struct.snork addrspace(1)* %arg4, i64 %tmp12, i32 8, i32 1, i64 0
call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %tmp99, i8 addrspace(3)* %tmp984, i64 16, i32 16, i1 false)
- call void @llvm.lifetime.end.p0i8(i64 144, i8* nonnull %tmp23) #3
+ call void @llvm.lifetime.end.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3
ret void
}
- declare void @func(float, i64, i16*)
- declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
+ declare void @func(float, i64, i16 addrspace(5)*)
+ declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #0
declare float @llvm.fmuladd.f32(float, float, float) #1
- declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+ declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #0
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.y() #1
declare i32 @llvm.amdgcn.workitem.id.z() #1
- declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0
+ declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i32, i1) #0
declare void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i64, i32, i1) #0
attributes #0 = { argmemonly nounwind }