[AMDGPU] Generate range metadata for workitem id
If workgroup size is known inform llvm about range returned by local
id and local size queries.
Differential Revision: https://reviews.llvm.org/D31804
llvm-svn: 300102
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index 744c1c2..6f5fc6d 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -9,7 +9,7 @@
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
- %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
%ld.64 = load i64, i64 addrspace(1)* %in.gep
@@ -42,7 +42,7 @@
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
- %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
%ld.64 = load i64, i64 addrspace(1)* %in.gep
@@ -58,7 +58,7 @@
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
- %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
%ld.64 = load i64, i64 addrspace(1)* %in.gep
@@ -106,7 +106,7 @@
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
- %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
%ld.64 = load i64, i64 addrspace(1)* %in.gep
@@ -122,7 +122,7 @@
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
- %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
%ld.64 = load i64, i64 addrspace(1)* %in.gep
@@ -138,7 +138,7 @@
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
- %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
%ld.64 = load i64, i64 addrspace(1)* %in.gep
@@ -156,7 +156,7 @@
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
- %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
%ld.64 = load i64, i64 addrspace(1)* %in.gep
@@ -383,5 +383,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() #0
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }