AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel
Currently the default C calling convention functions are treated
the same as compute kernels. Make this explicit so the default
calling convention can be changed to a non-kernel.
Converted with perl -pi -e 's/define void/define amdgpu_kernel void/'
on the relevant test directories (and undoing in one place that actually
wanted a non-kernel).
llvm-svn: 298444
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-f32.ll b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
index 805c0a7..bd6fea5 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
@@ -10,7 +10,7 @@
; GCN-HSA: flat_load_dword
; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-define void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
%tmp0 = load float, float addrspace(1)* %in
store float %tmp0, float addrspace(1)* %out
@@ -22,7 +22,7 @@
; GCN-HSA: flat_load_dwordx2
; R600: VTX_READ_64
-define void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <2 x float>, <2 x float> addrspace(1)* %in
store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
@@ -34,7 +34,7 @@
; GCN-HSA: flat_load_dwordx4
; R600: VTX_READ_128
-define void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <3 x float>, <3 x float> addrspace(1)* %in
store <3 x float> %tmp0, <3 x float> addrspace(1)* %out
@@ -46,7 +46,7 @@
; GCN-HSA: flat_load_dwordx4
; R600: VTX_READ_128
-define void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <4 x float>, <4 x float> addrspace(1)* %in
store <4 x float> %tmp0, <4 x float> addrspace(1)* %out
@@ -61,7 +61,7 @@
; R600: VTX_READ_128
; R600: VTX_READ_128
-define void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in
store <8 x float> %tmp0, <8 x float> addrspace(1)* %out
@@ -83,7 +83,7 @@
; R600: VTX_READ_128
; R600: VTX_READ_128
; R600: VTX_READ_128
-define void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <16 x float>, <16 x float> addrspace(1)* %in
store <16 x float> %tmp0, <16 x float> addrspace(1)* %out