[OPENMP][NVPTX]Emit default locations with the correct Exec|Runtime
modes.
If the region is inside target|teams|distribute region, we can emit the
locations with the correct info for execution mode and runtime mode.
Patch adds this ability to the NVPTX codegen to help the optimizer to
produce better code.
llvm-svn: 347583
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 5e7cf7f..4c17361 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -9,12 +9,14 @@
#define HEADER
// Check that the execution mode of all 6 target regions is set to Generic Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l103}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l180}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l290}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l328}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l346}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l311}}_exec_mode = weak constant i8 1
+// CHECK-DAG: [[NONSPMD:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: [[UNKNOWN:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: {{@__omp_offloading_.+l105}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l182}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l292}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l330}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l348}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l313}}_exec_mode = weak constant i8 1
__thread int id;
@@ -36,7 +38,7 @@
double cn[5][n];
TT<long long, char> d;
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l103}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l105}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -67,7 +69,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l103]]()
+ // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l105]]()
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
@@ -109,7 +111,7 @@
{
}
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l180}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l182}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -140,7 +142,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l180]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]], i[[SZ:32|64]] [[ID:%[a-zA-Z_]+]])
+ // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l182]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]], i[[SZ:32|64]] [[ID:%[a-zA-Z_]+]])
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]],
// CHECK: store i[[SZ]] [[ARG1]], i[[SZ]]* [[AA_ADDR]],
// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
@@ -183,7 +185,7 @@
id = aa;
}
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l290}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l292}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -214,7 +216,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l290]](i[[SZ]]
+ // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l292]](i[[SZ]]
// Create local storage for each capture.
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]*
@@ -375,7 +377,7 @@
return f;
}
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+328}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+330}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -406,7 +408,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l328]](i[[SZ]]
+ // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l330]](i[[SZ]]
// Create local storage for each capture.
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
@@ -461,10 +463,10 @@
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l346}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l348}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
- // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
+ // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[NONSPMD]]
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
// CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]],
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
@@ -495,7 +497,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l346]](
+ // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l348]](
// Create local storage for each capture.
// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1:%struct.*]]*
// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]]
@@ -558,8 +560,8 @@
// CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
- // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
- // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]])
+ // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[UNKNOWN]]
+ // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
// CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
@@ -584,13 +586,13 @@
// CHECK: icmp ne i8 [[RES]], 0
// CHECK: br i1
- // CHECK: [[RES:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @{{.+}}, i32 [[GTID]])
+ // CHECK: [[RES:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
// CHECK: icmp ne i16 [[RES]], 0
// CHECK: br i1
- // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
+ // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
// CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
- // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
+ // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
// CHECK: br label
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
@@ -614,7 +616,7 @@
// CHECK: ret i32 [[RES]]
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l311}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l313}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -645,7 +647,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l311]](i[[SZ]]
+ // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l313]](i[[SZ]]
// Create local storage for each capture.
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]