AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel

Currently the default C calling convention functions are treated
the same as compute kernels. Make this explicit so the default
calling convention can be changed to a non-kernel.

Converted with perl -pi -e 's/define void/define amdgpu_kernel void/'
on the relevant test directories (and undoing in one place that actually
wanted a non-kernel).

llvm-svn: 298444
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop.ll b/llvm/test/CodeGen/AMDGPU/ctpop.ll
index 9692236..a29e72e 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop.ll
@@ -16,7 +16,7 @@
 ; GCN: s_endpgm
 
 ; EG: BCNT_INT
-define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   store i32 %ctpop, i32 addrspace(1)* %out, align 4
   ret void
@@ -30,7 +30,7 @@
 ; GCN: s_endpgm
 
 ; EG: BCNT_INT
-define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
   %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   store i32 %ctpop, i32 addrspace(1)* %out, align 4
@@ -48,7 +48,7 @@
 
 ; EG: BCNT_INT
 ; EG: BCNT_INT
-define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
+define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
   %val0 = load i32, i32 addrspace(1)* %in0, align 4
   %val1 = load i32, i32 addrspace(1)* %in1, align 4
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
@@ -64,7 +64,7 @@
 ; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
-define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
+define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
   %val0 = load i32, i32 addrspace(1)* %in0, align 4
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
   %add = add i32 %ctpop0, %sval
@@ -79,7 +79,7 @@
 
 ; EG: BCNT_INT
 ; EG: BCNT_INT
-define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
   %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
   store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
@@ -97,7 +97,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 ; EG: BCNT_INT
-define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
   %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
   %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
   store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
@@ -123,7 +123,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 ; EG: BCNT_INT
-define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
   %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
   %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
   store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
@@ -165,7 +165,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 ; EG: BCNT_INT
-define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
   %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
   %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
   store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
@@ -179,7 +179,7 @@
 ; GCN: s_endpgm
 
 ; EG: BCNT_INT
-define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
   %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, 4
@@ -194,7 +194,7 @@
 ; GCN: s_endpgm
 
 ; EG: BCNT_INT
-define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
   %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 4, %ctpop
@@ -209,7 +209,7 @@
 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
-define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
   %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, 99999
@@ -225,7 +225,7 @@
 ; GCN: s_endpgm
 
 ; EG: BCNT_INT
-define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
+define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
   %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, %const
@@ -241,7 +241,7 @@
 ; GCN: s_endpgm
 
 ; EG: BCNT_INT
-define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
+define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
   %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %const, %ctpop
@@ -258,7 +258,7 @@
 ; GCN: s_endpgm
 
 ; EG: BCNT_INT
-define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
+define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
   %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
@@ -279,7 +279,7 @@
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 ; EG: BCNT_INT
-define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
+define amdgpu_kernel void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
 entry:
   %tmp0 = icmp eq i32 %cond, 0
   br i1 %tmp0, label %if, label %else