[OpenMP] Add a new version of the SPMD deinit kernel function
Summary: This patch adds a new runtime for the SPMD deinit kernel function which replaces the previous function. The new function takes as argument the flag which signals whether the runtime is required or not. This enables the compiler to optimize out the part of the deinit function which are not needed.
Reviewers: ABataev, caomhin
Reviewed By: ABataev
Subscribers: jholewinski, guansong, cfe-commits
Differential Revision: https://reviews.llvm.org/D54970
llvm-svn: 347915
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index def3ba7..d4b599e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -33,8 +33,8 @@
/// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
OMPRTL_NVPTX__kmpc_spmd_kernel_init,
- /// Call to void __kmpc_spmd_kernel_deinit();
- OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
+ /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
/// Call to void __kmpc_kernel_prepare_parallel(void
/// *outlined_function, int16_t
/// IsOMPRuntimeInitialized);
@@ -1413,8 +1413,11 @@
CGF.EmitBlock(OMPDeInitBB);
// DeInitialize the OMP state in the runtime; called by all active threads.
+ llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
+ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(EST.ExitBB);
@@ -1597,11 +1600,12 @@
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
break;
}
- case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
- // Build void __kmpc_spmd_kernel_deinit();
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
+ // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty};
auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
index bbde7bc..7964d76 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
@@ -68,7 +68,7 @@
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: br label {{%?}}[[EXIT:.+]]
//
// CHECK: [[EXIT]]
@@ -111,7 +111,7 @@
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: br label {{%?}}[[EXIT:.+]]
//
// CHECK: [[EXIT]]
diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
index 905487f..4b32f9d 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
@@ -57,7 +57,7 @@
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: br label {{%?}}[[EXIT:.+]]
//
// CHECK: [[EXIT]]
@@ -79,7 +79,7 @@
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: br label {{%?}}[[EXIT:.+]]
//
// CHECK: [[EXIT]]
@@ -100,7 +100,7 @@
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: br label {{%?}}[[EXIT:.+]]
//
// CHECK: [[EXIT]]
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
index 1687c8e..965ff9e 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -60,7 +60,7 @@
//
// CHECK: [[EXECUTE]]
// CHECK: {{call|invoke}} void [[PFN:@.+]](i32*
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
//
//
// define internal void [[PFN]](
@@ -254,7 +254,7 @@
//
// CHECK: [[EXECUTE]]
// CHECK: {{call|invoke}} void [[PFN1:@.+]](i32*
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
//
//
// define internal void [[PFN1]](
@@ -527,7 +527,7 @@
//
// CHECK: [[EXECUTE]]
// CHECK: {{call|invoke}} void [[PFN2:@.+]](i32*
- // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
//
//
// define internal void [[PFN2]](
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
index c62d254..33c8b06 100644
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -234,7 +234,7 @@
// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: ret
// CHECK: define internal void [[L0]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
index 7931571..0c06a60 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -84,7 +84,7 @@
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL1]](
@@ -98,7 +98,7 @@
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL2]](
@@ -112,7 +112,7 @@
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL3]](
@@ -204,7 +204,7 @@
// CHECK: [[DIST_INNER_LOOP_END]]:
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL4]](
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
index 9464475..f7ce262 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -32,7 +32,7 @@
// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} [[ARGC:%.+]], i32* dereferenceable{{.*}})
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
index 2e9ceb1..5a828af 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -79,7 +79,7 @@
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL1]](
@@ -93,7 +93,7 @@
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL2]](
@@ -107,7 +107,7 @@
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL3]](
@@ -123,7 +123,7 @@
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL4]](
diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index d0a31b0..97a1288 100644
--- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -98,6 +98,7 @@
// CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l43]](
//
// CHECK: {{call|invoke}} void [[T2]]_worker()
+
//
// CHECK: call void @__kmpc_kernel_init(
//
@@ -176,7 +177,7 @@
//
// CHECK: [[EXIT]]
// call void @__kmpc_restore_team_static_memory(i16 1)
- // CHECK: call void @__kmpc_spmd_kernel_deinit(
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.+}}, i16* dereferenceable{{.+}})
//
@@ -286,7 +287,6 @@
// CHECK: [[MAXV:%.+]] = phi i16 [ [[MAX1]], %[[DO_MAX]] ], [ [[MAX2]], %[[MAX_ELSE]] ]
// CHECK: store i16 [[MAXV]], i16* [[VAR2_LHS]],
// CHECK: ret void
-
//
// Shuffle and reduce function
// CHECK: define internal void [[PAR_SHUFFLE_REDUCE_FN]](i8*, i16 {{.*}}, i16 {{.*}}, i16 {{.*}})